[ { "id": "identity_31047", "image": "PMC-VQA/images/PMC6873490_Fig1_72668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the findings in the pulmonary CT images? \nChoices:\n(A) Single enhanced nodule in the right upper lobe. \n(B) Bilateral flaky blurred shadows with ill-defined boundary. \n(C) Small round lesions in lower lobe. \n(D) Normal pulmonary CT images. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.65625, "loss": 4.089524745941162 }, { "id": "identity_28941", "image": "PMC-VQA/images/PMC9052692_Fig3_268355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the stomach exhibits the irregularity in density? \nChoices:\n(A) Greater curve \n(B) Lesser curve \n(C) Anterior wall \n(D) Posterior wall " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.765625, "loss": 4.045445442199707 }, { "id": "identity_228664", "image": "AI2D/images/3438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the label for the segment of the leg that is closest to the body for each of the three types of legs (foreleg, mid leg, and hind leg) and then determine the label for the hind leg" }, { "from": "gpt", "value": "The answer is BH" } ], "clip_score": 23.40625, "loss": 5.327566146850586 }, { "id": "identity_276744", "image": "FigureQA/images/64340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the country with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.083952903747559 }, { "id": "identity_325815", "image": "ScienceQA/images/10822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is this object?\nChoices:\n(A) a gas\n(B) a solid\n(C) a liquid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.078125, "loss": 4.013214111328125 }, { "id": "identity_12905", "image": "VQA2.0/images/504608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reflection in the sunglasses?" }, { "from": "gpt", "value": "The answer is outside" } ], "clip_score": 25.921875, "loss": 4.575634956359863 }, { "id": "identity_269464", "image": "A-OKVQA/images/92894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the handedness of the Giants player?\nChoices:\n(A) none\n(B) right\n(C) left\n(D) normal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.765625, "loss": 4.252073287963867 }, { "id": "identity_318768", "image": "FigureQA/images/73907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.40625, "loss": 4.391605377197266 }, { "id": "identity_280675", "image": "FigureQA/images/22574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.381179332733154 }, { "id": "identity_270095", "image": "ScienceQA/images/18726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) South Carolina\n(B) North Carolina\n(C) Connecticut\n(D) Georgia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.3828277587890625 }, { "id": "identity_109047", "image": "A-OKVQA/images/161719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole bagels can be seen on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.875, "loss": 4.566049098968506 }, { "id": "identity_120018", "image": "Super-CLEVR/images/superCLEVR_new_001261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object floating above the golden motorcycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.046875, "loss": 4.152734279632568 }, { "id": "identity_294704", "image": "PMC-VQA/images/PMC6618072_hed25675-fig-0006_6431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image signify?\nOptions:\n(A) The reconstructed trachea.\n(B) The FPTAPF.\n(C) The esophagus.\n(D) The heart." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0, "loss": 5.18976354598999 }, { "id": "identity_252585", "image": "VQA2.0/images/531912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trick is the person performing on the skateboard, and what are the key elements that define this trick?" }, { "from": "gpt", "value": "The answer is Nose manual" } ], "clip_score": 32.0, "loss": 5.495214939117432 }, { "id": "identity_7348", "image": "Super-CLEVR/images/superCLEVR_new_000564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small rubber scooter have the same color as the big thing that is to the left of the big brown rubber articulated bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.5625, "loss": 5.280894756317139 }, { "id": "identity_332216", "image": "A-OKVQA/images/12240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person using an umbrella?\nChoices:\n(A) snow\n(B) disguise\n(C) sun\n(D) rain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.921875, "loss": 4.252880096435547 }, { "id": "identity_260362", "image": "FigureQA/images/24319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Dark Turquoise and Light Seafoam, and how does it compare to the percentage of Turquoise?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 39.5, "loss": 4.701560974121094 }, { "id": "identity_223169", "image": "TextVQA/images/c6f6d097f2611b69.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many characters are depicted on the clock face?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.625, "loss": 4.516855716705322 }, { "id": "identity_111151", "image": "Super-CLEVR/images/superCLEVR_new_000567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the cyan car and has the same shape as the green bicycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.515625, "loss": 4.101492881774902 }, { "id": "identity_338274", "image": "A-OKVQA/images/326161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these lamps trying to help people with?\nChoices:\n(A) sleep\n(B) see\n(C) smell\n(D) hear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.234375, "loss": 4.59862756729126 }, { "id": "identity_277463", "image": "ScienceQA/images/20768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\nChoices:\n(A) Atlanta\n(B) Frankfort\n(C) Carson City\n(D) Nashville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.0625, "loss": 4.151946067810059 }, { "id": "identity_277468", "image": "PMC-VQA/images/PMC7684318_Fig3_303791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Prior to the challenge, which organ exhibited mild congestion?\nChoices:\n(A) Lungs\n(B) Liver\n(C) Kidney\n(D) Heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.171875, "loss": 4.464186668395996 }, { "id": "identity_217041", "image": "A-OKVQA/images/451793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle in the foreground?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 16.921875, "loss": 4.544766902923584 }, { "id": "identity_34872", "image": "PMC-VQA/images/PMC4269088_fig4_345247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the right obturator canal? \nChoices:\n(A) Herniated small bowel loop \n(B) No herniation seen. \n(C) Herniated large bowel loop \n(D) Mesentery herniation. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.75, "loss": 4.246868133544922 }, { "id": "identity_158185", "image": "TQA/images/abc_question_images/parts_microscope_17193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'L' refer to on this microscope?" }, { "from": "gpt", "value": "The answer is Base" } ], "clip_score": 32.9375, "loss": 4.485598564147949 }, { "id": "identity_35694", "image": "ScienceQA/images/20128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New Jersey\n(B) Maine\n(C) Massachusetts\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.609375, "loss": 4.555117607116699 }, { "id": "identity_232552", "image": "TextVQA/images/4371fcaa74d62805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand and model of the mouse in the image?" }, { "from": "gpt", "value": "The answer is AVIA Krypton" } ], "clip_score": 31.953125, "loss": 4.543525695800781 }, { "id": "identity_28066", "image": "IconQA/images/42464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.071305751800537 }, { "id": "identity_319274", "image": "PMC-VQA/images/PMC3350148_fig3_137475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the right side of this show? \nChoices:\n(A) Lymphocele \n(B) Vein \n(C) Bladder \n(D) Ureter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.71875, "loss": 5.079833507537842 }, { "id": "identity_331597", "image": "ScienceQA/images/3264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Phoenix\n(B) Cheyenne\n(C) Oklahoma City\n(D) Tucson" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.546875, "loss": 4.474489688873291 }, { "id": "identity_331298", "image": "A-OKVQA/images/158713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on top of the front wheel of this vehicle?\nChoices:\n(A) basket\n(B) fruit\n(C) vegetables\n(D) bird" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9453125, "loss": 4.280174732208252 }, { "id": "identity_119893", "image": "PMC-VQA/images/PMC4077035_F1_302433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cross-sectional images are displayed in the series?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.140625, "loss": 4.095141410827637 }, { "id": "identity_29621", "image": "A-OKVQA/images/400592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the room?\nChoices:\n(A) dog\n(B) basketball hoop\n(C) messy desk\n(D) cat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.265625, "loss": 4.289578437805176 }, { "id": "identity_70030", "image": "ScienceQA/images/11569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are highlighted in this image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.875, "loss": 4.176163673400879 }, { "id": "identity_331192", "image": "A-OKVQA/images/573874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the multi-color umbrella?\nChoices:\n(A) uv protection\n(B) rain\n(C) celebration\n(D) children" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.203125, "loss": 4.727264881134033 }, { "id": "identity_279659", "image": "AI2D/images/2223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram corresponds to a mature adult salmon? Is it:\n(A) F\n(B) G\n(C) E\n(D) K" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.453125, "loss": 4.462082862854004 }, { "id": "identity_211527", "image": "VQA2.0/images/64619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of printed material is lying on the table?" }, { "from": "gpt", "value": "The answer is A red book or menu" } ], "clip_score": 12.7734375, "loss": 4.048206329345703 }, { "id": "identity_288665", "image": "AI2D/images/2075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely outcome if the producers in this ecosystem were to die off?\nChoices:\n(A) Decrease in the number of butterflies\n(B) Increase in the number of snakes\n(C) Increase in the number of birds\n(D) Decrease in the number of frogs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.84375, "loss": 4.1842041015625 }, { "id": "identity_272263", "image": "AI2D/images/2016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the classification of blackberries in the food chain?\nChoices:\n(A) Producers\n(B) Tertiary Consumer\n(C) Primary Consumer\n(D) Secondary Consumer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.8125, "loss": 4.163552284240723 }, { "id": "identity_211077", "image": "TextVQA/images/dda14cf415f6110a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows can be seen on the side of the aircraft in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.984375, "loss": 4.5850677490234375 }, { "id": "identity_7488", "image": "TextVQA/images/57b18b2387e1a9ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is being given away free?" }, { "from": "gpt", "value": "The answer is dog bookmarks" } ], "clip_score": 14.9375, "loss": 4.102612018585205 }, { "id": "identity_279206", "image": "IconQA/images/86593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by filling in the blank: It is (_) past six." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 25.671875, "loss": 5.163320541381836 }, { "id": "identity_304455", "image": "VQA2.0/images/575590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of vehicles are those?" }, { "from": "gpt", "value": "The answer is school" } ], "clip_score": 21.203125, "loss": 5.119390964508057 }, { "id": "identity_318175", "image": "AI2D/images/1971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were no more of these, which would likely starve first?\nChoices:\n(A) Tertiary consumers\n(B) Primary consumers\n(C) Primary producers\n(D) Quaternary consumers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.9375, "loss": 4.16061544418335 }, { "id": "identity_316424", "image": "AI2D/images/3283.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label here?\nChoices:\n(A) lysosome\n(B) golgi apparatus\n(C) ribosomes\n(D) peroxisome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.257126808166504 }, { "id": "identity_295989", "image": "A-OKVQA/images/444619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for individuals sitting on the sidewalk? Is it because they are watching a parade, observing traffic, being punished, or simply resting?\nChoices:\n(A) watching parade\n(B) watching traffic\n(C) as punishment\n(D) resting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.578125, "loss": 4.510496616363525 }, { "id": "identity_316999", "image": "Super-CLEVR/images/superCLEVR_new_000045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bicycle the same color as this shiny object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.546875, "loss": 4.985276222229004 }, { "id": "identity_32335", "image": "PMC-VQA/images/PMC4766417_f2_5619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) A cube \n(B) An animal \n(C) A crystal particle \n(D) A plant " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.140625, "loss": 4.163578033447266 }, { "id": "identity_34735", "image": "A-OKVQA/images/133680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is he sitting on the bench?\nChoices:\n(A) scheming\n(B) resting\n(C) playing\n(D) waiting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.75, "loss": 4.114014625549316 }, { "id": "identity_263075", "image": "DVQA/images/bar_train_00046694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bottom element in each stack of bars?" }, { "from": "gpt", "value": "The answer is degree" } ], "clip_score": 24.203125, "loss": 4.0843024253845215 }, { "id": "identity_100136", "image": "IconQA/images/104229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of object is in the third position?" }, { "from": "gpt", "value": "The answer is A red train" } ], "clip_score": 19.71875, "loss": 4.1544060707092285 }, { "id": "identity_196201", "image": "PMC-VQA/images/PMC8913091_F1_219605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the abnormality indicated in the MRI image?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 34.4375, "loss": 4.094396591186523 }, { "id": "identity_11173", "image": "ScienceQA/images/16136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) transparent\n(B) sweet\n(C) sour" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.078125, "loss": 4.155655384063721 }, { "id": "identity_311786", "image": "FigureQA/images/51500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.96875, "loss": 4.0704545974731445 }, { "id": "identity_289278", "image": "PMC-VQA/images/PMC6376781_Fig2_436488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which specific location is the tumor mass situated?\nChoices:\n(A) Left lower lobe bronchus\n(B) Right upper lobe bronchus\n(C) Left upper lobe bronchus\n(D) Right middle lobe bronchus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.796875, "loss": 4.223348140716553 }, { "id": "identity_292768", "image": "AI2D/images/4506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which category does leaf A fall under?\nChoices:\n(A) lacerate\n(B) undulate\n(C) entire\n(D) crenate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.859375, "loss": 4.239148139953613 }, { "id": "identity_182358", "image": "A-OKVQA/images/70363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many platforms are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.453125, "loss": 4.622868061065674 }, { "id": "identity_264937", "image": "VQA2.0/images/324823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a man present in the photo?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.703125, "loss": 4.115905284881592 }, { "id": "identity_203941", "image": "A-OKVQA/images/32773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many platforms are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.203125, "loss": 4.590097904205322 }, { "id": "identity_10772", "image": "PMC-VQA/images/PMC7885276_f1_368641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is imaged in the accompanying SPECT image?\nChoices:\n(A) Abdomen \n(B) Legs \n(C) Chest \n(D) Head " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.171875, "loss": 4.013297080993652 }, { "id": "identity_328486", "image": "PMC-VQA/images/PMC2940095_F0001_73716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the names of the arteries that are partially included in this? \nChoices:\n(A) Middle cerebral arteries and branches \n(B) Posterior cerebral arteries and branches \n(C) Anterior cerebral arteries and branches \n(D) Basilar arteries and branches" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.28125, "loss": 4.192167282104492 }, { "id": "identity_336005", "image": "DocVQA/images/jxcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the outcome mentioned in this document?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 22.953125, "loss": 4.868579387664795 }, { "id": "identity_330466", "image": "A-OKVQA/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why do safety workers wear this color?\nChoices:\n(A) visibility\n(B) style\n(C) tradition\n(D) cheaper" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.484375, "loss": 4.114753723144531 }, { "id": "identity_109294", "image": "Super-CLEVR/images/superCLEVR_new_001692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the purple airplane?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 28.140625, "loss": 4.257900238037109 }, { "id": "identity_10716", "image": "TextVQA/images/48c0660014cb1412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: which is the book on ulyesses?" }, { "from": "gpt", "value": "The answer is annotated" } ], "clip_score": 18.96875, "loss": 4.050156593322754 }, { "id": "identity_329424", "image": "ScienceQA/images/1538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these four items?\nChoices:\n(A) opaque\n(B) flexible\n(C) shiny" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.734375, "loss": 4.407887935638428 }, { "id": "identity_280554", "image": "VQA-AS/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object that the boy is observing?" }, { "from": "gpt", "value": "The answer is birds" } ], "clip_score": 24.984375, "loss": 4.504203796386719 }, { "id": "identity_106730", "image": "VizWiz/images/VizWiz_train_00010283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the container's lid?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 23.453125, "loss": 4.17360258102417 }, { "id": "identity_12083", "image": "PMC-VQA/images/PMC6281586_Fig6_408445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of scan used? \nChoices:\n(A) Axial CT \n(B) Sagittal CT \n(C) Coronal CT \n(D) MRI " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.78125, "loss": 4.624857425689697 }, { "id": "identity_38714", "image": "TextVQA/images/72061ebefb7a25db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the sports team?" }, { "from": "gpt", "value": "The answer is abro" } ], "clip_score": 15.8828125, "loss": 5.068976402282715 }, { "id": "identity_313153", "image": "ScienceQA/images/2180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option below is a fish?\nChoices:\n(A) keel-billed toucan\n(B) goldfish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.001596927642822 }, { "id": "identity_324084", "image": "IconQA/images/83639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.53560733795166 }, { "id": "identity_193544", "image": "A-OKVQA/images/253262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.703125, "loss": 4.343356609344482 }, { "id": "identity_13250", "image": "AI2D/images/2475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: WHICH STAGE APPEARS IN WINTER?\nChoices:\n(A) QUEEN FOR AGING\n(B) NEST MAKING\n(C) QUEEN HIBERNATES\n(D) QUEEN ANDMALES" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.234375, "loss": 4.60360050201416 }, { "id": "identity_268829", "image": "FigureQA/images/68893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the smoothest: Dark Blue? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7890625, "loss": 4.249650955200195 }, { "id": "identity_42975", "image": "DocVQA/images/kklg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title associated with the highest dollar amount?" }, { "from": "gpt", "value": "The answer is Res. Asst" } ], "clip_score": 19.3125, "loss": 4.916735649108887 }, { "id": "identity_288498", "image": "A-OKVQA/images/46155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the player aiming to maintain?\nChoices:\n(A) volley\n(B) net\n(C) complaints\n(D) emotions" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.25, "loss": 4.7414422035217285 }, { "id": "identity_277524", "image": "FigureQA/images/70794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Orchid or Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.000669002532959 }, { "id": "identity_328872", "image": "TabMWP/images/7342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0, "loss": 4.153103828430176 }, { "id": "identity_36390", "image": "TQA/images/question_images/earth_moon_phases_6012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which phase is when the moon is least visible on earth?\nChoices:\n(A) new moon\n(B) first quarter\n(C) waning crescent\n(D) waxing crescent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.453125, "loss": 4.208222389221191 }, { "id": "identity_204237", "image": "VQA2.0/images/486513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of crossing is marked on the road?" }, { "from": "gpt", "value": "The answer is A cycle crossing" } ], "clip_score": 21.890625, "loss": 4.072235584259033 }, { "id": "identity_299652", "image": "TextVQA/images/f0cb340ffd238456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify one of the names listed on this paper?" }, { "from": "gpt", "value": "The answer is siegfried" } ], "clip_score": 12.609375, "loss": 4.659870624542236 }, { "id": "identity_268565", "image": "ScienceQA/images/8297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific humidity level measured in the highlighted area shown?\nChoices:\n(A) 12 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 4 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.7578125, "loss": 4.284097671508789 }, { "id": "identity_117423", "image": "PMC-VQA/images/PMC6092347_Fig5_354068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the radiograph, what can be inferred about the internal structure of the pearl?" }, { "from": "gpt", "value": "The answer is Solid nacreous composition without visible inclusions" } ], "clip_score": 30.28125, "loss": 4.227646350860596 }, { "id": "identity_119885", "image": "TextVQA/images/1582c8538686d50d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the aircraft's nose?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 24.5625, "loss": 4.04456901550293 }, { "id": "identity_264591", "image": "AI2D/images/1885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the tertiary consumers in the provided food web.\nChoices:\n(A) carnivores\n(B) beetles, giant ants\n(C) leaves, fruits\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.4375, "loss": 4.74326753616333 }, { "id": "identity_10982", "image": "PMC-VQA/images/PMC5602954_Fig1_216803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was performed to detect the lung metastasis? \nChoices:\n(A) MRI chest and abdomen \n(B) CT chest only \n(C) PET scan only \n(D) CT chest, abdomen and pelvis. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.328125, "loss": 4.116768836975098 }, { "id": "identity_201325", "image": "PMC-VQA/images/PMC7610243_Fig7_290343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of lung collapse or pneumothorax?" }, { "from": "gpt", "value": "The answer is No signs visible" } ], "clip_score": 27.015625, "loss": 5.246816158294678 }, { "id": "identity_296832", "image": "PMC-VQA/images/PMC7787872_fig4_335944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probable diagnosis of the mass lesion depicted in the image, with the following options: (A) Myxoma, (B) Liposarcoma, (C) Meningioma, (D) Carcinoma?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.389984130859375 }, { "id": "identity_70921", "image": "ScienceQA/images/20719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 27.796875, "loss": 4.143771171569824 }, { "id": "identity_311383", "image": "A-OKVQA/images/73093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person doing?\nChoices:\n(A) pitching\n(B) sleeping\n(C) dancing\n(D) eating" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.1640625, "loss": 4.647042274475098 }, { "id": "identity_2562", "image": "FigureQA/images/64404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.984375, "loss": 4.458433628082275 }, { "id": "identity_311974", "image": "FigureQA/images/86307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the Midnight Blue line the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.453125, "loss": 4.20397424697876 }, { "id": "identity_181505", "image": "CLEVR-Math/images/CLEVR_train_042722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sphere that is the smallest in size?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 27.203125, "loss": 4.720409393310547 }, { "id": "identity_288619", "image": "VQA2.0/images/492138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the two women both possess goggles?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.34375, "loss": 4.084633827209473 }, { "id": "identity_332167", "image": "PMC-VQA/images/PMC5557509_Fig2_203153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color pattern of the cervical channel compared to the surrounding tissue in this scenario?\nChoices:\n(A) Blue\n(B) Red\n(C) Green\n(D) Yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.263360023498535 }, { "id": "identity_315922", "image": "ScienceQA/images/15638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Haiti\n(B) Jamaica\n(C) Saint Kitts and Nevis\n(D) Cuba" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.349854469299316 }, { "id": "identity_133265", "image": "VQA2.0/images/153397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many waves are visible in the background?" }, { "from": "gpt", "value": "The answer is Two waves" } ], "clip_score": 18.4375, "loss": 4.266923427581787 }, { "id": "identity_184650", "image": "PMC-VQA/images/PMC6206379_Fig4_387477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different symbols are used to mark specific features in this histological image?" }, { "from": "gpt", "value": "The answer is Four symbols" } ], "clip_score": 26.75, "loss": 4.309664726257324 }, { "id": "identity_265831", "image": "AI2D/images/3337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which cell location can the mitochondrion be found?\nChoices:\n(A) M\n(B) K\n(C) F\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.375, "loss": 4.235589027404785 }, { "id": "identity_284503", "image": "PMC-VQA/images/PMC9228668_polymers-14-02360-f001_323007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging mode is utilized in the SEM micrographs among the following options: BSE mode, SE mode, EDS mode, or CL mode?\nChoices:\n(A) BSE mode \n(B) SE mode \n(C) EDS mode \n(D) CL mode" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.91498327255249 }, { "id": "identity_294160", "image": "DVQA/images/bar_train_00032325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest total value when all values are added together?" }, { "from": "gpt", "value": "The answer is disk" } ], "clip_score": 22.328125, "loss": 4.234731197357178 }, { "id": "identity_302349", "image": "A-OKVQA/images/71264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of building does this animal live in?\nChoices:\n(A) highrise\n(B) duplex\n(C) bungalow\n(D) trailer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.28125, "loss": 4.416277885437012 }, { "id": "identity_21164", "image": "PMC-VQA/images/PMC2673345_fig03_37795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible on the reference image?\nChoices:\n(A) Fat suppression. \n(B) Shifted fat signal. \n(C) Bone fractures. \n(D) Tumor. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.796875, "loss": 4.305567264556885 }, { "id": "identity_131287", "image": "VizWiz/images/VizWiz_train_00004743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many barcodes are visible on the apple sticker?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.421875, "loss": 4.427814483642578 }, { "id": "identity_270377", "image": "AI2D/images/3607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anatomical structure is located directly beneath the larynx?\nChoices:\n(A) Nasal Cavity\n(B) Alveoli\n(C) Trachea\n(D) Diaohragm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.5, "loss": 4.158144950866699 }, { "id": "identity_275736", "image": "AI2D/images/1082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the Vacuole in the diagram? \nChoices:\n(A) B\n(B) J\n(C) A\n(D) H" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.6875, "loss": 4.2710676193237305 }, { "id": "identity_334524", "image": "A-OKVQA/images/433554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person wearing in red?\nChoices:\n(A) scuba outfit\n(B) lifejacket\n(C) swimsuit\n(D) jeans" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 4.43585205078125 }, { "id": "identity_29475", "image": "PMC-VQA/images/PMC7815715_Fig3_345484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of OCT scan is shown in the image?\nChoices:\n(A) Transverse (T) \n(B) En face (E) \n(C) Vertical (V) \n(D) Horizontal (H)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.546875, "loss": 4.621502876281738 }, { "id": "identity_294769", "image": "DocVQA/images/jqkm0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the consultation service mentioned in the document?" }, { "from": "gpt", "value": "The answer is Heinz consultation service" } ], "clip_score": 20.984375, "loss": 4.048212051391602 }, { "id": "identity_969", "image": "ScienceQA/images/14827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Kentucky?\nChoices:\n(A) Frankfort\n(B) Columbia\n(C) Oklahoma City\n(D) Detroit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.71875, "loss": 4.237910747528076 }, { "id": "identity_56167", "image": "DVQA/images/bar_train_00012223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for 'today'?" }, { "from": "gpt", "value": "The answer is Flash" } ], "clip_score": 26.515625, "loss": 4.261733055114746 }, { "id": "identity_292669", "image": "A-OKVQA/images/291476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of shot is the man in the image performing?\nChoices:\n(A) double\n(B) forehand\n(C) backhand\n(D) none" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.173758029937744 }, { "id": "identity_292677", "image": "TextVQA/images/26cb1858072bdfc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a bus for school?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 23.234375, "loss": 4.835944652557373 }, { "id": "identity_116935", "image": "VQA2.0/images/262715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are primarily shown in the image?" }, { "from": "gpt", "value": "The answer is Military aircraft" } ], "clip_score": 14.7578125, "loss": 4.534733772277832 }, { "id": "identity_12971", "image": "A-OKVQA/images/61997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity can you observe here?\nChoices:\n(A) wood carving\n(B) dog grooming\n(C) painting\n(D) skiing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1171875, "loss": 4.188385963439941 }, { "id": "identity_305597", "image": "DVQA/images/bar_train_00188728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first set of bars from the bottom?" }, { "from": "gpt", "value": "The answer is action" } ], "clip_score": 21.53125, "loss": 4.526339054107666 }, { "id": "identity_36541", "image": "PMC-VQA/images/PMC4875637_Fig2_32538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are shown in e, f? \nChoices:\n(A) PET images \n(B) T1-weighted images \n(C) T2-weighted images \n(D) CT images " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.46875, "loss": 4.219843864440918 }, { "id": "identity_111419", "image": "Super-CLEVR/images/superCLEVR_new_001502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the tiny white object behind the silver motorcycle?" }, { "from": "gpt", "value": "The answer is A minivan" } ], "clip_score": 25.71875, "loss": 4.4206719398498535 }, { "id": "identity_179867", "image": "IconQA/images/89542.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the flower's center?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 30.5, "loss": 4.207971572875977 }, { "id": "identity_296868", "image": "A-OKVQA/images/115502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which neighborhood does the bus route pass through?\nChoices:\n(A) ghetto\n(B) suburbs\n(C) china town\n(D) downtown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.015625, "loss": 4.079070091247559 }, { "id": "identity_220730", "image": "VQA2.0/images/380733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.234375, "loss": 4.295773983001709 }, { "id": "identity_32958", "image": "PMC-VQA/images/PMC7107010_F1_143306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is seen in the right hemisphere?\nChoices:\n(A) Open operculum sign. \n(B) Exposed insular cortex. \n(C) Arrested development of superior temporal gyrus and inferior frontal gyrus. \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.953125, "loss": 4.4989471435546875 }, { "id": "identity_21393", "image": "IconQA/images/79194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.703125, "loss": 4.4474382400512695 }, { "id": "identity_278341", "image": "AI2D/images/3701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label correctly identifies X among the following choices: (A) liver, (B) anus, (C) small intestine, or (D) large intestine?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.125, "loss": 4.3606181144714355 }, { "id": "identity_12585", "image": "PMC-VQA/images/PMC6908538_Fig1_82803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the image shown? \nChoices:\n(A) To highlight the difference between the treatment zones. \n(B) To showcase the brain's anatomy. \n(C) To show the effects of radiation therapy on the human brain. \n(D) To display a cross-section of the human brain. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.125, "loss": 4.088624000549316 }, { "id": "identity_301840", "image": "PMC-VQA/images/PMC4709269_fig7_461017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dashed lines in the image represent?\nChoices:\n(A) Regions of high activity in the brain \n(B) The boundaries of different brain regions \n(C) The location of identical spots within the brain \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.8125, "loss": 4.278717994689941 }, { "id": "identity_71263", "image": "AI2D/images/2008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are present in this food chain?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.28125, "loss": 4.356972694396973 }, { "id": "identity_198718", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 23.859375, "loss": 4.292855739593506 }, { "id": "identity_265909", "image": "ScienceQA/images/17423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is being indicated by the highlight?\nChoices:\n(A) Alaska\n(B) California\n(C) Nevada\n(D) Colorado" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 4.045727729797363 }, { "id": "identity_227027", "image": "Super-CLEVR/images/superCLEVR_new_000511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many large vehicles are there in the image, and what are their types?" }, { "from": "gpt", "value": "The answer is Three; dirtbikes and car" } ], "clip_score": 25.4375, "loss": 5.242424488067627 }, { "id": "identity_328509", "image": "A-OKVQA/images/313712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who uses the circular glass object here?\nChoices:\n(A) mothers\n(B) smokers\n(C) vegans\n(D) vegetarians" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2109375, "loss": 4.659749507904053 }, { "id": "identity_288218", "image": "A-OKVQA/images/405456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following individuals does this man most closely resemble: Moby, Chris Rock, Jimmy Smits, or Sanjay Gupta?\nChoices:\n(A) moby\n(B) chris rock\n(C) jimmy smits\n(D) sanjay gupta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.78125, "loss": 4.029398441314697 }, { "id": "identity_319915", "image": "A-OKVQA/images/465835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has stripes in this image?\nChoices:\n(A) jacket\n(B) zebras\n(C) tshirt\n(D) beer bottle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.015625, "loss": 4.387843132019043 }, { "id": "identity_16863", "image": "VQA2.0/images/351734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the dog?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 21.015625, "loss": 4.380429744720459 }, { "id": "identity_308913", "image": "Super-CLEVR/images/superCLEVR_new_001341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the cyan object that is in front of the large metal double bus?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 20.671875, "loss": 4.05585241317749 }, { "id": "identity_297757", "image": "ScienceQA/images/10728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\nChoices:\n(A) Albany\n(B) Providence\n(C) Newport\n(D) Concord" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.0370001792907715 }, { "id": "identity_217267", "image": "TextVQA/images/049d516ccb244077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the stripes on the cheerleaders' sleeves?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.203125, "loss": 4.293675899505615 }, { "id": "identity_34106", "image": "A-OKVQA/images/456756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What feature is this animal known for?\nChoices:\n(A) spinning webs\n(B) long neck\n(C) gills\n(D) armored shell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.125, "loss": 4.21564245223999 }, { "id": "identity_296091", "image": "A-OKVQA/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which design does the bike seat have among the following choices: leopard print, stripes, zebra print, or plaid?\nChoices:\n(A) leopard print\n(B) stripes\n(C) zebra print\n(D) plaid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0, "loss": 4.108325004577637 }, { "id": "identity_19552", "image": "Super-CLEVR/images/superCLEVR_new_001696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of small yellow matte biplanes that are in front of the brown matte dirtbike less than the number of big objects behind the small yellow matte aeroplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.375, "loss": 4.852500915527344 }, { "id": "identity_20996", "image": "A-OKVQA/images/232546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type business is the white sign advertising?\nChoices:\n(A) pharmacy\n(B) car shop\n(C) route store\n(D) grocer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.0750226974487305 }, { "id": "identity_287792", "image": "IconQA/images/44783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by completing the sentence: It is (_) past eight. Answer using a time word, not a number." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.375, "loss": 4.872778415679932 }, { "id": "identity_4295", "image": "A-OKVQA/images/573513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the red vehicle on the right stopped at the building?\nChoices:\n(A) to race\n(B) refueling\n(C) broke down\n(D) changing tires" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8046875, "loss": 4.404662609100342 }, { "id": "identity_222801", "image": "VQA2.0/images/161266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many swans are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.265625, "loss": 4.593352794647217 }, { "id": "identity_324222", "image": "A-OKVQA/images/200643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country does this flag belong to?\nChoices:\n(A) arab emirates\n(B) italy\n(C) egypt\n(D) romania" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.6875, "loss": 4.284031867980957 }, { "id": "identity_319479", "image": "IconQA/images/89783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.015625, "loss": 4.275568962097168 }, { "id": "identity_316538", "image": "DVQA/images/bar_train_00082575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is indicated by the crimson color?" }, { "from": "gpt", "value": "The answer is spot" } ], "clip_score": 25.8125, "loss": 4.42681884765625 }, { "id": "identity_308395", "image": "ScienceQA/images/11516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) scratchy\n(B) slippery\n(C) bouncy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.546875, "loss": 4.028679847717285 }, { "id": "identity_193358", "image": "VQA2.0/images/296631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible jars are on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.7890625, "loss": 4.538033485412598 }, { "id": "identity_198761", "image": "PMC-VQA/images/PMC6721794_fig2s1_32467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the brighter spots within the boxes compared to the surrounding areas?" }, { "from": "gpt", "value": "The answer is Higher expression of the target marker" } ], "clip_score": 19.890625, "loss": 4.361948490142822 }, { "id": "identity_20124", "image": "DocVQA/images/rsxw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the Board of Directors Meeting organized?" }, { "from": "gpt", "value": "The answer is VNA LIBRARY" } ], "clip_score": 25.203125, "loss": 4.008610725402832 }, { "id": "identity_320533", "image": "PMC-VQA/images/PMC7422746_F5_233106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue arrow indicate here?\nChoices:\n(A) Some organs at risk\n(B) A target volume\n(C) A planning dose\n(D) Respect of the organs at risk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.375, "loss": 4.777847766876221 }, { "id": "identity_137770", "image": "DocVQA/images/rsdw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections are under the Totelle Family category?" }, { "from": "gpt", "value": "The answer is Four sections" } ], "clip_score": 12.4921875, "loss": 4.611110687255859 }, { "id": "identity_40427", "image": "DVQA/images/bar_train_00196914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of units for the item \"bunk\"?" }, { "from": "gpt", "value": "The answer is sorry\" store" } ], "clip_score": 30.046875, "loss": 4.500717639923096 }, { "id": "identity_14033", "image": "FigureQA/images/17852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.921875, "loss": 4.159417152404785 }, { "id": "identity_27277", "image": "A-OKVQA/images/3528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely relation of the person pushing the stroller to the child in it?\nChoices:\n(A) grandmother\n(B) mother\n(C) cousin\n(D) aunt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 4.899838447570801 }, { "id": "identity_304627", "image": "DVQA/images/bar_train_00077353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the lime color represent?" }, { "from": "gpt", "value": "The answer is dozen" } ], "clip_score": 19.796875, "loss": 5.9005446434021 }, { "id": "identity_208177", "image": "A-OKVQA/images/500076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many police officers are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.140625, "loss": 4.487679481506348 }, { "id": "identity_215961", "image": "VizWiz/images/VizWiz_train_00007638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the shakers' base?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 22.703125, "loss": 4.040085792541504 }, { "id": "identity_23590", "image": "A-OKVQA/images/325487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the man in the center of the field want to achieve?\nChoices:\n(A) home run\n(B) walk\n(C) foul\n(D) strike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.90625, "loss": 4.376583099365234 }, { "id": "identity_329125", "image": "IconQA/images/101795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the third image?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.828125, "loss": 4.04396915435791 }, { "id": "identity_333564", "image": "A-OKVQA/images/500271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person use the toothbrush for?\nChoices:\n(A) grooming\n(B) brushing teeth\n(C) tickling\n(D) attacking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.4921875, "loss": 4.469220161437988 }, { "id": "identity_138748", "image": "FigureQA/images/10949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the second highest value on the y-axis?" }, { "from": "gpt", "value": "The answer is Saddle Brown" } ], "clip_score": 25.703125, "loss": 4.21221399307251 }, { "id": "identity_21448", "image": "A-OKVQA/images/480005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person taking the vegetables on the boat?\nChoices:\n(A) throwing away\n(B) to market\n(C) home\n(D) church" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.1875, "loss": 4.541459560394287 }, { "id": "identity_21507", "image": "VQA2.0/images/144018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the umbrellas multicolored?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.5625, "loss": 4.037532806396484 }, { "id": "identity_280460", "image": "AI2D/images/3274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the nucleus located among the following choices? \n(A) A\n(B) D\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.086581230163574 }, { "id": "identity_326964", "image": "PlotQA/images/120908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How are the labels in the legend arranged?" }, { "from": "gpt", "value": "The answer is horizontal" } ], "clip_score": 18.03125, "loss": 4.654036998748779 }, { "id": "identity_290598", "image": "PMC-VQA/images/PMC5908617_F1_304248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which structures is the line drawn parallel? \n(A) dorsal surfaces \n(B) lateral surfaces \n(C) ventral surfaces \n(D) anterior surfaces" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.296875, "loss": 4.403717041015625 }, { "id": "identity_267934", "image": "Super-CLEVR/images/superCLEVR_new_000840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray rubber object located to the right of the mountain bike and the small cyan metallic motorbike of equal size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.015625, "loss": 5.613287925720215 }, { "id": "identity_140466", "image": "DocVQA/images/ptfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient of this letter?" }, { "from": "gpt", "value": "The answer is Ole Salthe" } ], "clip_score": 19.765625, "loss": 6.065055847167969 }, { "id": "identity_271985", "image": "ScienceQA/images/2654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism among the following contains matter that was previously part of the bilberry?\nChoices:\n(A) barren-ground caribou\n(B) short-tailed weasel\n(C) lichen\n(D) bear sedge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.7534379959106445 }, { "id": "identity_319075", "image": "AI2D/images/3987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost part of this diagram?\nChoices:\n(A) plasmalemma\n(B) cell wall\n(C) rough endoplasmic reticulum\n(D) vacuole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.21875, "loss": 4.8003082275390625 }, { "id": "identity_128284", "image": "VQA2.0/images/346290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Daytime, likely afternoon" } ], "clip_score": 18.421875, "loss": 4.0847697257995605 }, { "id": "identity_17163", "image": "A-OKVQA/images/376921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in black about to do?\nChoices:\n(A) move residences\n(B) sales call\n(C) work errand\n(D) cook lunch" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.1796875, "loss": 4.744059085845947 }, { "id": "identity_40221", "image": "DVQA/images/bar_train_00007961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest overall value?" }, { "from": "gpt", "value": "The answer is sense" } ], "clip_score": 26.03125, "loss": 4.116621971130371 }, { "id": "identity_264907", "image": "PMC-VQA/images/PMC3228743_F2_117897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the observation on the surgical chest radiograph regarding the cardiac silhouette?\nChoices:\n(A) wider cardiac silhouette \n(B) narrow cardiac silhouette and visualization of main pulmonary artery \n(C) narrow cardiac silhouette without visualization of main pulmonary artery \n(D) no significant finding detected on the cardiac silhouette." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.265625, "loss": 4.5490336418151855 }, { "id": "identity_308318", "image": "VQA-AS/images/8818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is it?" }, { "from": "gpt", "value": "The answer is on grass" } ], "clip_score": 21.8125, "loss": 4.716636657714844 }, { "id": "identity_105787", "image": "VQA2.0/images/316669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of drink is in the glass behind the plates?" }, { "from": "gpt", "value": "The answer is Water" } ], "clip_score": 14.1796875, "loss": 4.305182456970215 }, { "id": "identity_325529", "image": "PMC-VQA/images/PMC7394290_fig2_223351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where can the opacities be found?\nChoices:\n(A) Right lung \n(B) Both lungs \n(C) Left lung \n(D) Abdomen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.78125, "loss": 4.803165435791016 }, { "id": "identity_272377", "image": "PMC-VQA/images/PMC4404124_Fig1_379483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagrams is highlighted in yellow? \nChoices:\n(A) Mesoderm\n(B) Endoderm\n(C) Ectoderm\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.316770553588867 }, { "id": "identity_186761", "image": "VQA2.0/images/128027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the houseboat with the most windows?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 23.265625, "loss": 4.50661563873291 }, { "id": "identity_314717", "image": "ScienceQA/images/19463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Phoenix\n(B) Manchester\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.75, "loss": 4.505427837371826 }, { "id": "identity_33159", "image": "A-OKVQA/images/406047.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What level of event is this?\nChoices:\n(A) hobby\n(B) national\n(C) international\n(D) local" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.875, "loss": 4.070474147796631 }, { "id": "identity_30738", "image": "PMC-VQA/images/PMC3034685_F4_86438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red arrowheads indicate in the microphotographs?\nChoices:\n(A) Notochord staining \n(B) Anterior end \n(C) Dorsal side \n(D) Posterior end " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.875, "loss": 4.758073806762695 }, { "id": "identity_325382", "image": "A-OKVQA/images/5620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of roadway are these vehicles traveling on?\nChoices:\n(A) intersection\n(B) highway\n(C) rural road\n(D) traffic circle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.9140625, "loss": 4.1064677238464355 }, { "id": "identity_3247", "image": "PMC-VQA/images/PMC6452502_ccr32032-fig-0001_462275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the yellow arrow in the CT? \nChoices:\n(A) Normal tonsil tissue \n(B) A tumor \n(C) Peritonsillar abscess \n(D) Swollen lymph node. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.796875, "loss": 4.612344264984131 }, { "id": "identity_129470", "image": "VizWiz/images/VizWiz_train_00008973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the cart have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.40625, "loss": 4.893801689147949 }, { "id": "identity_25482", "image": "DVQA/images/bar_train_00132018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left?" }, { "from": "gpt", "value": "The answer is remark" } ], "clip_score": 18.78125, "loss": 4.884604454040527 }, { "id": "identity_28499", "image": "PMC-VQA/images/PMC4579082_pbio.1002253.g002_425907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What muscle is being shown in the transverse sections in panel (e,f)? \nChoices:\n(A) Soleus. \n(B) Masseter. \n(C) Biceps brachii. \n(D) Pectoralis minor. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.124042510986328 }, { "id": "identity_322794", "image": "A-OKVQA/images/235595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the parent organization of this vehicle?\nChoices:\n(A) ford\n(B) toyota\n(C) honda\n(D) ferrari" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1484375, "loss": 4.889716148376465 }, { "id": "identity_330156", "image": "PMC-VQA/images/PMC5909365_Fig5_304664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow indicate in this image?\nChoices:\n(A) The renal artery\n(B) The left renal artery\n(C) The right renal artery\n(D) The left kidney" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.890625, "loss": 4.404208183288574 }, { "id": "identity_209979", "image": "A-OKVQA/images/28654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are parked in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 11.46875, "loss": 4.353126049041748 }, { "id": "identity_334592", "image": "TabMWP/images/11825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A school administrator who was concerned about grade inflation looked over the number of straight-A students from year to year. According to the table, what was the rate of change between these two years?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 31.390625, "loss": 4.06841516494751 }, { "id": "identity_202595", "image": "A-OKVQA/images/456886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the building on the right side of the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 23.8125, "loss": 4.919948577880859 }, { "id": "identity_261346", "image": "AI2D/images/1841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary food that pigs consume?\nChoices:\n(A) cows\n(B) slop\n(C) grass\n(D) humans" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.890625, "loss": 4.128437042236328 }, { "id": "identity_224260", "image": "DVQA/images/bar_train_00085669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across all shops, and which item has the highest total sales?" }, { "from": "gpt", "value": "The answer is Sport" } ], "clip_score": 33.8125, "loss": 4.370131015777588 }, { "id": "identity_35165", "image": "A-OKVQA/images/39846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the vent on top of the stove for?\nChoices:\n(A) smoke\n(B) soda\n(C) coffee\n(D) water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.34375, "loss": 4.038824081420898 }, { "id": "identity_115577", "image": "TextVQA/images/e271d16f9a60e06e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bookshelves are placed outside the bookstore?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.203125, "loss": 4.6935811042785645 }, { "id": "identity_188525", "image": "A-OKVQA/images/479078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen watching the event?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 10.1953125, "loss": 4.313563346862793 }, { "id": "identity_242006", "image": "TextVQA/images/4748e3595f9984b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company on the red banner, and how many people are standing on the track in front of the banner?" }, { "from": "gpt", "value": "The answer is Santander, 27 people" } ], "clip_score": 29.96875, "loss": 4.1153998374938965 }, { "id": "identity_8862", "image": "Super-CLEVR/images/superCLEVR_new_001508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the large metallic bus?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 25.71875, "loss": 4.19890832901001 }, { "id": "identity_220779", "image": "TextVQA/images/8540548a2a0db34f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common feature of the products with a 'V' symbol on their price tag?" }, { "from": "gpt", "value": "The answer is Quality selection" } ], "clip_score": 20.75, "loss": 5.3939971923828125 }, { "id": "identity_215908", "image": "VQA2.0/images/230712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street name mentioned on the ice cream cart?" }, { "from": "gpt", "value": "The answer is Fullerton St" } ], "clip_score": 30.015625, "loss": 4.784041404724121 }, { "id": "identity_68562", "image": "ScienceQA/images/17522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 mL of pink particles were removed from Solution B, how many would remain?" }, { "from": "gpt", "value": "The answer is 5 particles" } ], "clip_score": 30.296875, "loss": 4.299614906311035 }, { "id": "identity_60117", "image": "DocVQA/images/qsjw0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the speaker discussing the transition from Institute to University?" }, { "from": "gpt", "value": "The answer is Hartline" } ], "clip_score": 20.640625, "loss": 5.077178001403809 }, { "id": "identity_215483", "image": "A-OKVQA/images/33813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many watercrafts with blue covers can be counted in the image?" }, { "from": "gpt", "value": "The answer is YOUR COUNT" } ], "clip_score": 20.1875, "loss": 4.384158611297607 }, { "id": "identity_2793", "image": "AI2D/images/3093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label is pointing to the cell wall?\nChoices:\n(A) R\n(B) U\n(C) S\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.6875, "loss": 4.1292290687561035 }, { "id": "identity_151108", "image": "ScienceQA/images/9024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many openings does the hair clip have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.390625, "loss": 4.71795129776001 }, { "id": "identity_104025", "image": "VQA2.0/images/431363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number of the truck?" }, { "from": "gpt", "value": "The answer is HF58 HOA" } ], "clip_score": 19.6875, "loss": 4.0989460945129395 }, { "id": "identity_294091", "image": "A-OKVQA/images/424300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of environment is depicted in the image? \nOptions:\n(A) field\n(B) jungle\n(C) beach\n(D) forest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.6484375, "loss": 4.13738489151001 }, { "id": "identity_201025", "image": "PMC-VQA/images/PMC3755418_fig4_227731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical location of the mass?" }, { "from": "gpt", "value": "The answer is Left breast" } ], "clip_score": 24.71875, "loss": 4.315824508666992 }, { "id": "identity_314691", "image": "FigureQA/images/84408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive's value the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.109375, "loss": 4.305638313293457 }, { "id": "identity_10442", "image": "PMC-VQA/images/PMC6325806_Fig11_420541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the outer margins of the postabdominal segments?\nChoices:\n(A) Overlap \n(B) Disarticulation \n(C) Telescoping \n(D) Constriction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.109375, "loss": 4.6677937507629395 }, { "id": "identity_312671", "image": "FigureQA/images/27077.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 4.3288469314575195 }, { "id": "identity_285049", "image": "Super-CLEVR/images/superCLEVR_new_001308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that are the same size as the purple matte motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.953125, "loss": 5.165452003479004 }, { "id": "identity_206516", "image": "PMC-VQA/images/PMC8042902_Fig1_416625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is predominantly visible in the center of the image?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 21.5625, "loss": 4.046217441558838 }, { "id": "identity_261461", "image": "ChartQA/images/two_col_100701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the volume experience an increase or a decrease?" }, { "from": "gpt", "value": "The answer is increasing" } ], "clip_score": 19.78125, "loss": 4.044604301452637 }, { "id": "identity_185076", "image": "VQA2.0/images/201062.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is placed on top of the freezer, to the right?" }, { "from": "gpt", "value": "The answer is A digital kitchen scale" } ], "clip_score": 26.671875, "loss": 4.314985275268555 }, { "id": "identity_277016", "image": "A-OKVQA/images/464149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show if it is ahead in the morning? \nChoices:\n(A) ten\n(B) eleven\n(C) four\n(D) three" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 5.17578125, "loss": 4.07059907913208 }, { "id": "identity_35443", "image": "PMC-VQA/images/PMC7073084_ijms-21-01383-f003_134672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the scale bar indicate in the image? \nChoices:\n(A) Bacterial size \n(B) Human size \n(C) Amount of protein \n(D) Amount of starch granules " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.09375, "loss": 4.28054141998291 }, { "id": "identity_237143", "image": "A-OKVQA/images/572362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest the weather condition, and how do they collectively support the reason for using an umbrella?" }, { "from": "gpt", "value": "The answer is rain" } ], "clip_score": 26.09375, "loss": 5.2864089012146 }, { "id": "identity_305386", "image": "TQA/images/abc_question_images/human_system_muscular_16161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled as N in this diagram of the human muscular system?\nChoices:\n(A) FRONTALIS\n(B) GRACILIS\n(C) SARTORIUS\n(D) DELTOID" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.6875, "loss": 4.001673221588135 }, { "id": "identity_313457", "image": "ScienceQA/images/3276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Orlando\n(C) Tallahassee\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.34375, "loss": 4.2033233642578125 }, { "id": "identity_308015", "image": "PMC-VQA/images/PMC3755418_fig1_227730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in the left breast?\nChoices:\n(A) Upper outer quadrant \n(B) Lower middle quadrant \n(C) Upper middle quadrant \n(D) Lower outer quadrant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.90625, "loss": 4.369964122772217 }, { "id": "identity_115248", "image": "Super-CLEVR/images/superCLEVR_new_000695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle in the foreground?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 26.9375, "loss": 4.467745780944824 }, { "id": "identity_328022", "image": "A-OKVQA/images/538609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of internet device is being used at this workspace?\nChoices:\n(A) desktop computer\n(B) tablet\n(C) smartphone\n(D) laptop computer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.453125, "loss": 4.769928932189941 }, { "id": "identity_9553", "image": "A-OKVQA/images/569190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The rightmost set of rails leads to which railway structure?\nChoices:\n(A) workshop\n(B) depot\n(C) turntable\n(D) train station" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.53125, "loss": 4.34492301940918 }, { "id": "identity_13363", "image": "VizWiz/images/VizWiz_train_00006424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is sweetarts candy tabs" } ], "clip_score": 28.453125, "loss": 4.310611248016357 }, { "id": "identity_283447", "image": "A-OKVQA/images/214213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of shot is the woman going to hit next? Is it a serve, backhand, forehand, or slice?\nChoices:\n(A) serve\n(B) backhand\n(C) forehand\n(D) slice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.546875, "loss": 4.11616325378418 }, { "id": "identity_285471", "image": "A-OKVQA/images/526257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Behind which base is the photographer standing? \n(A) home\n(B) third\n(C) first\n(D) second" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.328125, "loss": 4.074192047119141 }, { "id": "identity_106580", "image": "TextVQA/images/929d7b10f28440dd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.203125, "loss": 4.412050247192383 }, { "id": "identity_215334", "image": "A-OKVQA/images/159601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski lifts are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.75, "loss": 4.651060104370117 }, { "id": "identity_108563", "image": "A-OKVQA/images/31923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman on the left carrying?" }, { "from": "gpt", "value": "The answer is A container or box" } ], "clip_score": 17.453125, "loss": 4.623753070831299 }, { "id": "identity_307830", "image": "FigureQA/images/5304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Deep Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.009838104248047 }, { "id": "identity_279306", "image": "TQA/images/abc_question_images/cycle_water_14953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents Evaporation from the given choices: (A) R, (B) J, (C) D, (D) H." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.84375, "loss": 4.214311122894287 }, { "id": "identity_106749", "image": "VQA2.0/images/501698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the make of the motorcycle?" }, { "from": "gpt", "value": "The answer is Win100" } ], "clip_score": 24.6875, "loss": 4.205399990081787 }, { "id": "identity_121388", "image": "VQA2.0/images/12421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No people visible" } ], "clip_score": 16.34375, "loss": 4.243512153625488 }, { "id": "identity_195409", "image": "A-OKVQA/images/204188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the floor beneath the toilet display?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.84375, "loss": 4.150102615356445 }, { "id": "identity_283332", "image": "VQA2.0/images/83858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person?" }, { "from": "gpt", "value": "The answer is white with red" } ], "clip_score": 18.078125, "loss": 4.364887714385986 }, { "id": "identity_263006", "image": "A-OKVQA/images/462037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the objects that the girls' steering wheels control?\nChoices:\n(A) car\n(B) barbie house\n(C) video game\n(D) toy truck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.25, "loss": 4.098555564880371 }, { "id": "identity_24419", "image": "PMC-VQA/images/PMC5353970_F1_140826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is being described in the caption? \nChoices:\n(A) PET scan \n(B) MRI scan \n(C) CT scan \n(D) X-ray " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.1147613525390625 }, { "id": "identity_267675", "image": "AI2D/images/327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does C represent in the diagram? \nChoices:\n(A) Newest Rocks\n(B) Oldest Rocks\n(C) Time\n(D) Oldest Rocks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.25, "loss": 4.2969865798950195 }, { "id": "identity_313763", "image": "FigureQA/images/20072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.171875, "loss": 4.023869037628174 }, { "id": "identity_270307", "image": "A-OKVQA/images/305093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sound is produced by the animal? \nOptions:\n(A) neigh\n(B) woof\n(C) moo\n(D) meow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.09375, "loss": 4.267768383026123 }, { "id": "identity_15150", "image": "A-OKVQA/images/245182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is required for this activity?\nChoices:\n(A) water\n(B) snow\n(C) sun\n(D) wind" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.7578125, "loss": 4.519001007080078 }, { "id": "identity_48921", "image": "DVQA/images/bar_train_00124889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the second highest number of units sold?" }, { "from": "gpt", "value": "The answer is holder" } ], "clip_score": 23.5625, "loss": 4.03610372543335 }, { "id": "identity_286583", "image": "PMC-VQA/images/PMC8960880_Fig7_237236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the panel on the right, which areas of the implant are displayed in higher magnification views? \nChoices:\n(A) Head and tail\n(B) Tail and body\n(C) Head and tip\n(D) Body and tip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.8125, "loss": 4.100050449371338 }, { "id": "identity_336006", "image": "TextVQA/images/0d4f6be234078add.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this beer?" }, { "from": "gpt", "value": "The answer is war" } ], "clip_score": 23.875, "loss": 6.267763137817383 }, { "id": "identity_306895", "image": "A-OKVQA/images/47481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person most likely retrieving in front of this?\nChoices:\n(A) water\n(B) soda\n(C) juice\n(D) milk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.28125, "loss": 4.0389180183410645 }, { "id": "identity_273396", "image": "TQA/images/question_images/waves_9295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the distance between two consecutive crests?\nChoices:\n(A) amplitude\n(B) oscillation\n(C) period\n(D) frequency" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.09375, "loss": 5.163710594177246 }, { "id": "identity_189493", "image": "TextVQA/images/5f0c8fe763efeb62.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is the third person from the left holding?" }, { "from": "gpt", "value": "The answer is A gift box" } ], "clip_score": 16.9375, "loss": 4.929039478302002 }, { "id": "identity_273437", "image": "AI2D/images/3633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct location of the heart in the human body?\nChoices:\n(A) hand\n(B) leg\n(C) head\n(D) chest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.609375, "loss": 4.526583671569824 }, { "id": "identity_66983", "image": "AI2D/images/1836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the owl play in both the food chain and food web?" }, { "from": "gpt", "value": "The answer is Secondary consumer" } ], "clip_score": 36.84375, "loss": 4.465280532836914 }, { "id": "identity_274119", "image": "TextVQA/images/021e8e65743a4599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two words are located near the roses?" }, { "from": "gpt", "value": "The answer is birthday rememberanes" } ], "clip_score": 17.921875, "loss": 4.108879089355469 }, { "id": "identity_33096", "image": "PMC-VQA/images/PMC4312691_F1_354340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What finding is noted in the left maxilla based on the axial plane image?\nChoices:\n(A) Buccal expansion \n(B) Lingual expansion \n(C) Mesial root resorption \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.484375, "loss": 4.291468620300293 }, { "id": "identity_266705", "image": "ScienceQA/images/20958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Knoxville\n(B) Jersey City\n(C) Trenton\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.859375, "loss": 4.006495475769043 }, { "id": "identity_319707", "image": "PMC-VQA/images/PMC7814591_Fig8_345052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in Panel c of the imaging?\nChoices:\n(A) Left transverse dural sinus \n(B) Absent Enhancement \n(C) Shallow left sigmoid groove \n(D) Small/ hypoplastic left sigmoid dural sinus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 4.365026473999023 }, { "id": "identity_5514", "image": "TextVQA/images/17f22d6d1632458b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time is shown on the watch?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 27.515625, "loss": 4.435394763946533 }, { "id": "identity_269", "image": "Super-CLEVR/images/superCLEVR_new_001457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bicycle that is behind the tiny matte biplane made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.75, "loss": 5.343710422515869 }, { "id": "identity_35701", "image": "A-OKVQA/images/382873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the chair on top of the elephant called?\nChoices:\n(A) saddle\n(B) gondola\n(C) howdah\n(D) chaise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.546875, "loss": 4.186124801635742 }, { "id": "identity_35766", "image": "PMC-VQA/images/PMC4592919_fig9_429543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI image is shown in the figure? \nChoices:\n(A) T1-weighted MRI \n(B) T2-weighted MRI \n(C) Diffusion-weighted MRI \n(D) Magnetic resonance angiography (MRA) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.734375, "loss": 4.123085021972656 }, { "id": "identity_281107", "image": "ScienceQA/images/10413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the primary consumer in this food web among the following organisms:\n(A) phytoplankton\n(B) kelp bass\n(C) orca\n(D) sea otter" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.4375, "loss": 4.673397064208984 }, { "id": "identity_106781", "image": "VQA-AS/images/16377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bushes are there in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.15625, "loss": 4.709906578063965 }, { "id": "identity_320159", "image": "PMC-VQA/images/PMC8841589_F1_193678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the structures indicated by the white arrowheads? \nChoices:\n(A) Right atrium \n(B) Left atrium \n(C) Inferior vena cava \n(D) Middle hepatic vein." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.859375, "loss": 4.01694393157959 }, { "id": "identity_328660", "image": "TQA/images/abc_question_images/human_system_eye_16121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the fovea?\nChoices:\n(A) T\n(B) F\n(C) J\n(D) E" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.234375, "loss": 4.32763147354126 }, { "id": "identity_274603", "image": "FigureQA/images/27630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Royal Blue or Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.09375, "loss": 4.475754737854004 }, { "id": "identity_54887", "image": "MapQA/images/map_7014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the 31.6%-31.9% range for the percentage of companies?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 30.609375, "loss": 4.2171196937561035 }, { "id": "identity_114360", "image": "TextVQA/images/012858674b02d06a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars are parked on the right side of the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.703125, "loss": 4.410872936248779 }, { "id": "identity_294854", "image": "A-OKVQA/images/500271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose does the lady use the toothbrush?\nChoices:\n(A) grooming\n(B) brushing teeth\n(C) tickling\n(D) attacking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.1875, "loss": 4.340906143188477 }, { "id": "identity_189367", "image": "PMC-VQA/images/PMC5815711_F1_276716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 24.109375, "loss": 4.136218070983887 }, { "id": "identity_188210", "image": "TextVQA/images/a0d2fab6b1602d38.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows of keys are there on the typewriter?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 29.421875, "loss": 4.363125801086426 }, { "id": "identity_17382", "image": "DocVQA/images/txpp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the olive badge indicates ?" }, { "from": "gpt", "value": "The answer is nonmember" } ], "clip_score": 14.8046875, "loss": 5.195198059082031 }, { "id": "identity_41619", "image": "DVQA/images/bar_train_00030753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest combined value of 'humor' and 'campus'?" }, { "from": "gpt", "value": "The answer is rice" } ], "clip_score": 26.84375, "loss": 4.550904750823975 }, { "id": "identity_323008", "image": "TabMWP/images/17812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 4.17563533782959 }, { "id": "identity_12436", "image": "DocVQA/images/shlk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Hostess for Nutrition and snack foods?" }, { "from": "gpt", "value": "The answer is Connie Bea Hope" } ], "clip_score": 20.59375, "loss": 4.796768665313721 }, { "id": "identity_302640", "image": "FigureQA/images/61808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta the one with the smallest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.187957286834717 }, { "id": "identity_209388", "image": "PMC-VQA/images/PMC4062408_pone-0097368-g005_299287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed within the axial localizer image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.828125, "loss": 4.2083210945129395 }, { "id": "identity_320662", "image": "ScienceQA/images/15367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Dover\n(B) Honolulu\n(C) Hilo\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.670780658721924 }, { "id": "identity_34655", "image": "A-OKVQA/images/36548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this motorcycle designed to do?\nChoices:\n(A) pull trailer\n(B) drive fast\n(C) dirt racing\n(D) jump high" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.953125, "loss": 4.409286975860596 }, { "id": "identity_137333", "image": "DocVQA/images/rlvm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the expected output of the project?" }, { "from": "gpt", "value": "The answer is Report" } ], "clip_score": 17.84375, "loss": 4.89590311050415 }, { "id": "identity_206210", "image": "VQA2.0/images/321588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any boats visible in the water?" }, { "from": "gpt", "value": "The answer is No boats visible" } ], "clip_score": 17.90625, "loss": 4.731474876403809 }, { "id": "identity_286226", "image": "VQA-AS/images/4678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the window have a curved top?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.0625, "loss": 4.10323429107666 }, { "id": "identity_326870", "image": "VizWiz/images/VizWiz_train_00019938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the screen display?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.125, "loss": 4.205153942108154 }, { "id": "identity_6300", "image": "ScienceQA/images/17338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) hard\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.609375, "loss": 5.145754814147949 }, { "id": "identity_116961", "image": "PMC-VQA/images/PMC4028266_pone-0098088-g008_290149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many nuclei are indicated by white arrows in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.40625, "loss": 4.375927925109863 }, { "id": "identity_13822", "image": "PMC-VQA/images/PMC8618346_viruses-13-02109-f003_121105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted as red spheres in the segmented 3D volume? \nChoices:\n(A) Mature virions \n(B) Immature virions \n(C) Cytoplasm \n(D) Nucleus " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.109375, "loss": 4.508704662322998 }, { "id": "identity_288584", "image": "PMC-VQA/images/PMC7301053_Fig2_194514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the MRI scan without contrast?\nChoices:\n(A) An enlarged brain \n(B) White matter hypointensities \n(C) Generalized hypertrophy \n(D) Patchy hyperintensities." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.34375, "loss": 4.180884838104248 }, { "id": "identity_282648", "image": "IconQA/images/79194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a circle using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.277684211730957 }, { "id": "identity_32887", "image": "PMC-VQA/images/PMC8666393_Fig2_137527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the arrow in the image? \nChoices:\n(A) to indicate the presence of lymph nodes \n(B) to indicate the presence of a tumor \n(C) to indicate the presence of a foreign body \n(D) to indicate the presence of inflammation " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.525832176208496 }, { "id": "identity_152004", "image": "AI2D/images/4506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of leaf margin is depicted in the second leaf from the left?" }, { "from": "gpt", "value": "The answer is Entire" } ], "clip_score": 37.71875, "loss": 4.234760761260986 }, { "id": "identity_254632", "image": "A-OKVQA/images/334400.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the man's yellow shirt is for visibility, identify two other elements in the image that contribute to safety and explain their roles" }, { "from": "gpt", "value": "The answer is Stop sign and crosswalk ensure safety" } ], "clip_score": 22.265625, "loss": 4.0807390213012695 }, { "id": "identity_226655", "image": "PMC-VQA/images/PMC7439464_fig4_238121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key structural differences observed in the image that indicate it was captured using transmission electron microscopy, and how do these differences compare to images captured using the other microscopy techniques listed?" }, { "from": "gpt", "value": "The answer is Detailed internal structures and high resolution indicate TEM" } ], "clip_score": 27.1875, "loss": 4.271275997161865 }, { "id": "identity_285846", "image": "A-OKVQA/images/452459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which age group typically uses this room? Older adults, children, teenagers, or young adults?\nChoices:\n(A) older adult\n(B) child\n(C) teenager\n(D) young adult" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.6875, "loss": 4.37416934967041 }, { "id": "identity_199718", "image": "VQA2.0/images/253166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the back wall?" }, { "from": "gpt", "value": "The answer is Olympus" } ], "clip_score": 20.890625, "loss": 4.208255290985107 }, { "id": "identity_133616", "image": "A-OKVQA/images/539041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toothbrushes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.84375, "loss": 4.661287307739258 }, { "id": "identity_38066", "image": "PMC-VQA/images/PMC4600274_Fig1_431743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT image show on the left femoral head? \nChoices:\n(A) partial femoral head collapse \n(B) lesion in the loaded part \n(C) high intensity \n(D) MRI STIR " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 5.023211479187012 }, { "id": "identity_261737", "image": "PMC-VQA/images/PMC9323302_jpm-12-01151-f001_355272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which body part did the distal tibial nail get pulled out? \nChoices:\n(A) Ankle\n(B) Knee\n(C) Hip\n(D) Elbow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5, "loss": 4.155779838562012 }, { "id": "identity_17902", "image": "PMC-VQA/images/PMC8539837_Fig5_93605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the virtual reality view represented in the figures? \nChoices:\n(A) Edited \n(B) Not edited \n(C) It doesn't show the virtual view \n(D) Mirrored " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 4.463103771209717 }, { "id": "identity_62034", "image": "DocVQA/images/jsbn0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What types of agreements or arrangements are disclosed in Part 3?" }, { "from": "gpt", "value": "The answer is Future employment, leave absence, continuing payments, retention disposition" } ], "clip_score": 28.3125, "loss": 4.217215538024902 }, { "id": "identity_125722", "image": "VQA2.0/images/351734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the surface of the table?" }, { "from": "gpt", "value": "The answer is Brown wood" } ], "clip_score": 18.0625, "loss": 5.029199600219727 }, { "id": "identity_313431", "image": "A-OKVQA/images/487835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is someone walking?\nChoices:\n(A) river\n(B) forest\n(C) roadway\n(D) subway" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.796875, "loss": 4.25701904296875 }, { "id": "identity_308335", "image": "FigureQA/images/89250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Khaki the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.671875, "loss": 4.39789342880249 }, { "id": "identity_319662", "image": "TextVQA/images/87d7fdf7b45162d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Person vs. what?" }, { "from": "gpt", "value": "The answer is internet" } ], "clip_score": 14.3125, "loss": 5.150575160980225 }, { "id": "identity_34525", "image": "AI2D/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the endosperm?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.3125, "loss": 4.418819427490234 }, { "id": "identity_304399", "image": "PMC-VQA/images/PMC4864780_Fig5_29969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of organisms are depicted in the images?\nChoices:\n(A) Bacteria\n(B) Zooplankton\n(C) Phytoplankton\n(D) Fungi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.71875, "loss": 4.454828262329102 }, { "id": "identity_120788", "image": "TextVQA/images/29c74a4c7bc7b60e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing white jerseys?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.890625, "loss": 4.827034950256348 }, { "id": "identity_330204", "image": "PMC-VQA/images/PMC7373644_figure2_217655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in the image?\nChoices:\n(A) The lesion\n(B) The nipple\n(C) The reconstructed plane\n(D) Unable to determine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.333364486694336 }, { "id": "identity_185455", "image": "VQA2.0/images/181084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the balloons in the background?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 20.328125, "loss": 4.016872882843018 }, { "id": "identity_187522", "image": "TextVQA/images/152b1e329b8b71d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys are visible on the ship?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.359375, "loss": 4.377216815948486 }, { "id": "identity_106937", "image": "VQA2.0/images/448609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ramp the person is skateboarding on?" }, { "from": "gpt", "value": "The answer is Light green" } ], "clip_score": 25.625, "loss": 4.215970516204834 }, { "id": "identity_282898", "image": "PMC-VQA/images/PMC9393506_f1_377421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the lesion situated?\nChoices:\n(A) Within the nasal cavity \n(B) In the brain \n(C) In the sphenoid bone \n(D) Within the sphenoid sinus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.84375, "loss": 4.732191562652588 }, { "id": "identity_26664", "image": "A-OKVQA/images/498339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are people here to do?\nChoices:\n(A) worship\n(B) travel\n(C) work\n(D) shop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.40625, "loss": 4.589674949645996 }, { "id": "identity_322339", "image": "PMC-VQA/images/PMC6156007_F2_371926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When was the CT scan in the image taken?\nChoices:\n(A) Before the ankle joint dislocation,\n(B) After the ankle joint dislocation,\n(C) During the ankle joint dislocation,\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.3125, "loss": 4.615845203399658 }, { "id": "identity_293101", "image": "PMC-VQA/images/PMC5110894_fig7_95813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the indication of the 3D reconstructed MDCT chest coronal section image showing?\nChoices:\n(A) Abnormal pressure in the heart \n(B) Enlargement of the left side of the heart \n(C) Left aneurysm of sinus of Valsalva \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.203125, "loss": 4.2744245529174805 }, { "id": "identity_266459", "image": "VQA2.0/images/169510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On what is the man seated?" }, { "from": "gpt", "value": "The answer is cooler" } ], "clip_score": 17.734375, "loss": 4.623466491699219 }, { "id": "identity_304856", "image": "VQA-RAD/images/synpic17693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what plane is this X-ray?" }, { "from": "gpt", "value": "The answer is PA" } ], "clip_score": 27.796875, "loss": 4.042996406555176 }, { "id": "identity_286987", "image": "PMC-VQA/images/PMC4867686_F1_30691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the fourth column of the images provided?\nChoices:\n(A) GFP fluorescence \n(B) RFP fluorescence \n(C) Chlorophyll auto fluorescence \n(D) Differential interference contrast image" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.046875, "loss": 4.170255661010742 }, { "id": "identity_310890", "image": "A-OKVQA/images/46077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does one person lack that the other person has?\nChoices:\n(A) helmet\n(B) shoes\n(C) pants\n(D) shirt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.84375, "loss": 4.097426414489746 }, { "id": "identity_228186", "image": "IconQA/images/72419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 32.40625, "loss": 4.495301246643066 }, { "id": "identity_228834", "image": "DVQA/images/bar_train_00132474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'chief' category, and what is that accuracy value?" }, { "from": "gpt", "value": "The answer is Jazz, 80" } ], "clip_score": 28.015625, "loss": 4.045032501220703 }, { "id": "identity_192500", "image": "VizWiz/images/VizWiz_train_00007008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object on the left side of the radio?" }, { "from": "gpt", "value": "The answer is Gaming controller" } ], "clip_score": 26.640625, "loss": 4.75766134262085 }, { "id": "identity_27030", "image": "PMC-VQA/images/PMC6186532_Fig1_381772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are shown in the middle and bottom rows? \nChoices:\n(A) coronal fusion slices \n(B) sagittal fusion slices \n(C) transaxial fusion slices \n(D) PET/CT images " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.65625, "loss": 4.830766677856445 }, { "id": "identity_263599", "image": "DocVQA/images/gmbw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of patients is being analyzed?" }, { "from": "gpt", "value": "The answer is Higher risk patients" } ], "clip_score": 18.265625, "loss": 4.09197473526001 }, { "id": "identity_124785", "image": "A-OKVQA/images/358576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of fruits are displayed in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.828125, "loss": 4.117903709411621 }, { "id": "identity_62990", "image": "ScienceQA/images/11933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be deduced about the temperature of the substance based on its flow in the image?" }, { "from": "gpt", "value": "The answer is Moderate temperature" } ], "clip_score": 23.6875, "loss": 4.070683479309082 }, { "id": "identity_321249", "image": "A-OKVQA/images/74365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these individuals most likely located?\nChoices:\n(A) garage\n(B) park\n(C) office\n(D) mall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 4.953125, "loss": 4.349420070648193 }, { "id": "identity_330038", "image": "A-OKVQA/images/333691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How old is this person now?\nChoices:\n(A) older adult\n(B) teen\n(C) young adult\n(D) child" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.484375, "loss": 4.063996315002441 }, { "id": "identity_71269", "image": "AI2D/images/1762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food web?" }, { "from": "gpt", "value": "The answer is Turnstones" } ], "clip_score": 24.1875, "loss": 4.4711737632751465 }, { "id": "identity_206472", "image": "VQA2.0/images/297607.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rail tracks are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.125, "loss": 4.340763092041016 }, { "id": "identity_27660", "image": "PMC-VQA/images/PMC6189522_fig4_382355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain did the tissue sample come from? \nChoices:\n(A) Frontal lobe, \n(B) Parietal lobe, \n(C) Temporal lobe, \n(D) Occipital lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 4.28811502456665 }, { "id": "identity_154508", "image": "AI2D/images/4221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does \"rx\" stand for in the labels \"metamorphic rx\" and \"igneous rx\"?" }, { "from": "gpt", "value": "The answer is Rock" } ], "clip_score": 30.734375, "loss": 5.180499076843262 }, { "id": "identity_337954", "image": "PMC-VQA/images/PMC4472651_Fig1_398035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the abnormality located?\nChoices:\n(A) Right side\n(B) Left side\n(C) Both sides\n(D) Unknown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.828125, "loss": 4.361739158630371 }, { "id": "identity_303346", "image": "Super-CLEVR/images/superCLEVR_new_000124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more small rubber vehicles than regular buses?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.484375, "loss": 4.937467098236084 }, { "id": "identity_302905", "image": "Super-CLEVR/images/superCLEVR_new_000711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the metal thing that is the same shape as the big yellow object?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 14.8984375, "loss": 4.037821292877197 }, { "id": "identity_18556", "image": "PMC-VQA/images/PMC7229234_fig0010_171770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the urethrography x-ray show in the tip of the penis? \nChoices:\n(A) Enlarged prostate gland (arrow) \n(B) Normal urethra with no evidence of stricture (arrow) \n(C) Multiple preputial stones (arrow) \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.6875, "loss": 4.11116886138916 }, { "id": "identity_318062", "image": "TextVQA/images/383c36ab0d36019c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this store?" }, { "from": "gpt", "value": "The answer is goswami " } ], "clip_score": 30.703125, "loss": 4.011177062988281 }, { "id": "identity_11238", "image": "PMC-VQA/images/PMC6510869_Fig3_477973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which modality of the image is used to measure disability status? \nChoices:\n(A) T2-weighted image \n(B) Proton Density weighted image \n(C) 3D T1-weighted image \n(D) Fluid Attenuated Inversion Recovery (FLAIR) image. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.640625, "loss": 4.377599239349365 }, { "id": "identity_265585", "image": "FigureQA/images/53945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Yellow Green not the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.125, "loss": 4.120951175689697 }, { "id": "identity_3424", "image": "Super-CLEVR/images/superCLEVR_new_001277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more big tandem bikes to the left of the wagon than small red double buss?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.546875, "loss": 5.141061782836914 }, { "id": "identity_292811", "image": "A-OKVQA/images/527248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the roof shape of the building? \nChoices:\n(A) pointy roof\n(B) conical roof\n(C) pavilion roof\n(D) tower roof" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.734375, "loss": 4.287114143371582 }, { "id": "identity_208009", "image": "PMC-VQA/images/PMC4101714_F1_306697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 29.96875, "loss": 4.4597673416137695 }, { "id": "identity_309911", "image": "PMC-VQA/images/PMC4994314_Fig1_65493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color does the nodule appear as on the image?\nChoices:\n(A) Red \n(B) Blue \n(C) Green \n(D) Yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.640625, "loss": 4.509670257568359 }, { "id": "identity_334698", "image": "A-OKVQA/images/133680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is he doing on the bench?\nChoices:\n(A) scheming\n(B) resting\n(C) playing\n(D) waiting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.8125, "loss": 4.453558921813965 }, { "id": "identity_262632", "image": "PMC-VQA/images/PMC8309360_plants-10-01442-f001_19466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the tomato leaf, what is indicated by the blue arrow pointing to?\nChoices:\n(A) Stomata\n(B) Trichomes\n(C) PD\n(D) Cell Wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.890625, "loss": 5.472640514373779 }, { "id": "identity_300909", "image": "AI2D/images/3081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outer boundary of this structure?\nChoices:\n(A) Cytoplasm\n(B) Mitochondrion\n(C) Nucleus\n(D) Cell membrane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.859375, "loss": 4.106065273284912 }, { "id": "identity_5944", "image": "AI2D/images/2730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage comes after waning gibbous?\nChoices:\n(A) waxing gibbous\n(B) new moon\n(C) last quarter\n(D) first quater" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.84375, "loss": 4.229691982269287 }, { "id": "identity_282070", "image": "A-OKVQA/images/397665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With which device would the word at the top be most closely associated?\nChoices:\n(A) microwaves\n(B) kettle\n(C) cell phones\n(D) microphones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.578125, "loss": 4.655155181884766 }, { "id": "identity_286044", "image": "PMC-VQA/images/PMC6423182_Fig19_453342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of probe was utilized to capture the images among the following options: (A) Cardiac probe, (B) Transvaginal probe, (C) Abdominal probe, or (D) High-frequency linear probe?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.484375, "loss": 4.380343914031982 }, { "id": "identity_11949", "image": "ScienceQA/images/17203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Delaware?\nChoices:\n(A) Providence\n(B) Dover\n(C) Columbus\n(D) Sacramento" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.125, "loss": 4.121868133544922 }, { "id": "identity_279947", "image": "Super-CLEVR/images/superCLEVR_new_000685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large motorcycle located behind the yellow motorcycle which is behind the green double-decker bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.03125, "loss": 5.200562000274658 }, { "id": "identity_285789", "image": "A-OKVQA/images/292697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the arrangement of elements in this picture? \nChoices:\n(A) photography\n(B) finger paint\n(C) square match\n(D) collage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.1875, "loss": 4.255054950714111 }, { "id": "identity_303926", "image": "DVQA/images/bar_train_00104859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of bars are there?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 18.09375, "loss": 4.1870832443237305 }, { "id": "identity_217533", "image": "PMC-VQA/images/PMC7294455_F20_192412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar length in the image?" }, { "from": "gpt", "value": "The answer is The length indicated on the scale bar label" } ], "clip_score": 13.28125, "loss": 4.334733486175537 }, { "id": "identity_213176", "image": "A-OKVQA/images/349193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are watching the game from behind the net?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 18.078125, "loss": 4.20352029800415 }, { "id": "identity_312635", "image": "AI2D/images/2116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A food web is shown. Which of the following organisms compete for this as a food source?\nChoices:\n(A) Insects and Spartina\n(B) Spiders and Mudcrabs\n(C) Detritus and Nematodes\n(D) All of the Above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.765625, "loss": 4.312582969665527 }, { "id": "identity_308633", "image": "ScienceQA/images/832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image?\nChoices:\n(A) Connecticut\n(B) New Jersey\n(C) Kentucky\n(D) Virginia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.890625, "loss": 4.705416679382324 }, { "id": "identity_311455", "image": "AI2D/images/1197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is labeled C?\nChoices:\n(A) Abdomen\n(B) Legs\n(C) Head\n(D) Eyes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.203125, "loss": 4.093761444091797 }, { "id": "identity_238148", "image": "A-OKVQA/images/166173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in performing a successful skateboard trick, starting from the jump to the landing, and what must make contact with the ground to complete the trick?" }, { "from": "gpt", "value": "The answer is wheels" } ], "clip_score": 31.53125, "loss": 4.802618026733398 }, { "id": "identity_117958", "image": "PMC-VQA/images/PMC5482258_f04_181823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is primarily displayed in the image?" }, { "from": "gpt", "value": "The answer is Head and upper neck" } ], "clip_score": 24.59375, "loss": 4.053849220275879 }, { "id": "identity_45212", "image": "MapQA/images/map_1641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code for the states with the highest value range?" }, { "from": "gpt", "value": "The answer is Dark Red" } ], "clip_score": 28.859375, "loss": 4.1340131759643555 }, { "id": "identity_244904", "image": "VQA2.0/images/179586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red things are visible in the image, and what are their respective locations?" }, { "from": "gpt", "value": "The answer is Four red things; two on each door" } ], "clip_score": 13.875, "loss": 4.167001724243164 }, { "id": "identity_329803", "image": "PMC-VQA/images/PMC5804004_Fig1_273098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located?\nChoices:\n(A) Brainstem\n(B) Cervical spine\n(C) Thoracolumbar spine\n(D) Sacrum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.875, "loss": 4.297213554382324 }, { "id": "identity_292253", "image": "PMC-VQA/images/PMC3605910_pone-0059740-g006_193981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this image, what is represented by the green fluorescence?\nChoices:\n(A) Necrosis \n(B) Autofluorescence \n(C) Favorable labeling section \n(D) Unfavorable labeling section" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.577884674072266 }, { "id": "identity_130061", "image": "TextVQA/images/374c57d7b6d53d41.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city is associated with the brand of beer in the image?" }, { "from": "gpt", "value": "The answer is Barcelona" } ], "clip_score": 22.0625, "loss": 4.189425468444824 }, { "id": "identity_327283", "image": "TabMWP/images/26380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.9375, "loss": 4.247452259063721 }, { "id": "identity_245788", "image": "ScienceQA/images/9313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the materials used in the construction of this clothespin and describe their respective functions" }, { "from": "gpt", "value": "The answer is Wood for structure, metal for tension" } ], "clip_score": 31.71875, "loss": 4.102909088134766 }, { "id": "identity_266549", "image": "PMC-VQA/images/PMC6261832_rcr2392-fig-0002_402074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lung segments are depicted in the lower section images? \nChoices:\n(A) Right upper\n(B) Right middle\n(C) Right lower\n(D) Left lower" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.75, "loss": 4.413110733032227 }, { "id": "identity_4580", "image": "PMC-VQA/images/PMC9222266_gels-08-00386-f004_319990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image (a) show? \nChoices:\n(A) a microstructure of super-swelling PPG before swelling \n(B) a microstructure of super-swelling PPG after swelling \n(C) a chemical structure of super-swelling PPG before swelling \n(D) a chemical structure of super-swelling PPG after swelling " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.344201564788818 }, { "id": "identity_114356", "image": "TextVQA/images/221dc4ac9ba1c1ce.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handbags are visible on the shelf in the store?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 22.203125, "loss": 4.747386455535889 }, { "id": "identity_268632", "image": "TQA/images/abc_question_images/photosynthesis_11315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the absorption of carbon dioxide by the plant.\nChoices:\n(A) H\n(B) E\n(C) C\n(D) W AND K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.609375, "loss": 4.265277862548828 }, { "id": "identity_271147", "image": "FigureQA/images/38394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Light Green or Dark Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.628129005432129 }, { "id": "identity_303128", "image": "DVQA/images/bar_train_00013315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is vote" } ], "clip_score": 21.84375, "loss": 4.231015205383301 }, { "id": "identity_282738", "image": "A-OKVQA/images/88538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the umbrellas being utilized?\nChoices:\n(A) illuminating\n(B) shielding from the sun\n(C) preventing rain\n(D) ornamentation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.03125, "loss": 4.412798881530762 }, { "id": "identity_266638", "image": "DocVQA/images/kqxw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the creator?" }, { "from": "gpt", "value": "The answer is Joe Leinster" } ], "clip_score": 17.25, "loss": 4.729350566864014 }, { "id": "identity_288216", "image": "PMC-VQA/images/PMC5825313_Fig7_280034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific type of fracture that the patient is diagnosed with? \nChoices:\n(A) Growing Parietal Fracture\n(B) Leptomeningeal Cyst\n(C) Posterior Parietal Calvarial Fracture\n(D) Cerebral Palsy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5625, "loss": 4.697298049926758 }, { "id": "identity_294992", "image": "TabMWP/images/10931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day of the week did the subway record the lowest number of passengers? Is it:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.46875, "loss": 4.181685447692871 }, { "id": "identity_22060", "image": "A-OKVQA/images/65001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When stopping what body part did most people use to stop their vehicles?\nChoices:\n(A) foot\n(B) hand\n(C) eye\n(D) hip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.03125, "loss": 4.532055854797363 }, { "id": "identity_20326", "image": "IconQA/images/107100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of triangles even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.6875, "loss": 4.123232841491699 }, { "id": "identity_302510", "image": "A-OKVQA/images/301453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What prevents the motorbike from falling over?\nChoices:\n(A) braces\n(B) bricks\n(C) wall\n(D) person" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.875, "loss": 4.262804985046387 }, { "id": "identity_303660", "image": "VQA2.0/images/324823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a person in the photo?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.171875, "loss": 4.028886318206787 }, { "id": "identity_321811", "image": "A-OKVQA/images/238806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green stuff on?\n\nChoices:\n(A) salad\n(B) apple\n(C) pizza\n(D) hot dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.375, "loss": 4.510041236877441 }, { "id": "identity_7253", "image": "ScienceQA/images/13411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the producer in this food web?\nChoices:\n(A) pine vole\n(B) bolete fungus\n(C) persimmon tree\n(D) gray fox" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.120248317718506 }, { "id": "identity_314164", "image": "PMC-VQA/images/PMC7601856_materials-13-04498-f004_288003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the SEM image in (d) depict?\nChoices:\n(A) CrFeNiMn alloy \n(B) SLM-2 \n(C) SE images of equiaxed grains \n(D) XY-plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.302401542663574 }, { "id": "identity_196688", "image": "PMC-VQA/images/PMC8870832_diagnostics-12-00306-f005_203038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled as 'a' in this image?" }, { "from": "gpt", "value": "The answer is Right lung affected area" } ], "clip_score": 30.8125, "loss": 5.065240383148193 }, { "id": "identity_18198", "image": "A-OKVQA/images/256983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smiling woman doing?\nChoices:\n(A) daydreaming\n(B) talking\n(C) listening\n(D) singing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.140625, "loss": 4.3559770584106445 }, { "id": "identity_325026", "image": "TQA/images/question_images/sun_layers_4062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the warmest part of this?\n\nChoices:\n(A) Core\n(B) Sunspots\n(C) Convection Zone\n(D) Photosphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.796875, "loss": 4.308047294616699 }, { "id": "identity_19184", "image": "FigureQA/images/52984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.96875, "loss": 4.114767074584961 }, { "id": "identity_333634", "image": "PMC-VQA/images/PMC7362644_Fig1_214575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which limbs exhibit a neurogenic pattern?\nChoices:\n(A) Lower limbs \n(B) Upper limbs \n(C) Both limbs \n(D) No particular limb" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.234375, "loss": 4.155106544494629 }, { "id": "identity_17704", "image": "FigureQA/images/90812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise less than Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.8125, "loss": 4.045089244842529 }, { "id": "identity_34382", "image": "A-OKVQA/images/385405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of store is this?\nChoices:\n(A) shoe\n(B) grocery\n(C) pet\n(D) beauty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.09375, "loss": 4.137665748596191 }, { "id": "identity_218737", "image": "A-OKVQA/images/561037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.671875, "loss": 4.475627899169922 }, { "id": "identity_19211", "image": "FigureQA/images/61501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Green have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.1875, "loss": 4.1485466957092285 }, { "id": "identity_290142", "image": "PMC-VQA/images/PMC7906016_F3_374088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the red arrow in the image?\nChoices:\n(A) Normal tissue, \n(B) Ischemic infarction in the right temporal lobe, \n(C) Ischemic infarction in the left occipital lobe, \n(D) Ischemic infarction in the cerebellum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.84375, "loss": 4.094295024871826 }, { "id": "identity_332635", "image": "PMC-VQA/images/PMC7034755_FIG1_122260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the PE found in this image?\nChoices:\n(A) Left Pulmonary Artery\n(B) Bronchus\n(C) Right Pulmonary Artery\n(D) Trachea" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.515625, "loss": 4.702861785888672 }, { "id": "identity_212735", "image": "VQA-AS/images/11962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.546875, "loss": 4.047212600708008 }, { "id": "identity_328417", "image": "PMC-VQA/images/PMC8221312_f5_474069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells is being examined in this image?\nChoices:\n(A) cardiomyocytes \n(B) neutrophils \n(C) endothelial cells \n(D) smooth muscle cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.695783615112305 }, { "id": "identity_69467", "image": "AI2D/images/414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of producers are present in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.84375, "loss": 4.684996604919434 }, { "id": "identity_278876", "image": "DVQA/images/bar_train_00046721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.09375, "loss": 4.578306674957275 }, { "id": "identity_261705", "image": "A-OKVQA/images/465424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable food choice for the bikers among the following options: pizza, curry, hamburgers, or fish?\nChoices:\n(A) pizza\n(B) curry\n(C) hamburgers\n(D) fish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.34375, "loss": 4.273621559143066 }, { "id": "identity_17304", "image": "VizWiz/images/VizWiz_train_00018502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Yes, I just need to know if there is any dosage information visible on this bottle. Thank you." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.640625, "loss": 4.1906819343566895 }, { "id": "identity_332427", "image": "PMC-VQA/images/PMC7321844_Fig1_200931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrow heads in this image?\nChoices:\n(A) An area of hemorrhage \n(B) A blood clot within the aneurysm \n(C) The path of the blood flow \n(D) The location of the aneurysm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.703125, "loss": 4.522315979003906 }, { "id": "identity_337351", "image": "ScienceQA/images/4415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Cuba\n(B) the Dominican Republic\n(C) Haiti\n(D) Dominica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.2230353355407715 }, { "id": "identity_297042", "image": "A-OKVQA/images/381948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of transport aircraft is flying overhead? \n(A) cargo\n(B) passenger\n(C) private\n(D) military" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.015625, "loss": 4.183865547180176 }, { "id": "identity_56623", "image": "DocVQA/images/jkkp0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the guest who stayed in room 1165?" }, { "from": "gpt", "value": "The answer is William Darby" } ], "clip_score": 19.296875, "loss": 4.221350193023682 }, { "id": "identity_10444", "image": "IconQA/images/63322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.140625, "loss": 4.0502214431762695 }, { "id": "identity_301206", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there netting behind the batter?\nChoices:\n(A) safety\n(B) practice\n(C) decoration\n(D) style" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.484375, "loss": 4.421437740325928 }, { "id": "identity_63161", "image": "ScienceQA/images/2080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct parts can be identified in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.515625, "loss": 4.418224334716797 }, { "id": "identity_278135", "image": "TextVQA/images/329087659dd853b8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement displayed on the wall?" }, { "from": "gpt", "value": "The answer is guycooklandco" } ], "clip_score": 12.9453125, "loss": 5.580101013183594 }, { "id": "identity_283360", "image": "PMC-VQA/images/PMC4689044_Fig6_455701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which view was the X-ray taken?\nChoices:\n(A) posteroanterior\n(B) anteroposterior\n(C) lateral\n(D) oblique." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.359375, "loss": 4.4138593673706055 }, { "id": "identity_197079", "image": "A-OKVQA/images/500271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toys can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.8046875, "loss": 4.503159523010254 }, { "id": "identity_330106", "image": "ScienceQA/images/14346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Carson City\n(B) Colorado Springs\n(C) Baltimore\n(D) Denver" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.6875, "loss": 4.627587795257568 }, { "id": "identity_322649", "image": "TabMWP/images/33611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Sean's campground is near two hiking trails with scenic destinations. One trail ends at an underground lake at the bottom of a small cave. The other trail ends at a mountain peak. The elevation of each destination is shown on the campground map. Which trail's destination is at a higher elevation?\n\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.96875, "loss": 4.172656059265137 }, { "id": "identity_802", "image": "A-OKVQA/images/137230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the store with the green sign sell?\nChoices:\n(A) food\n(B) books\n(C) insurance\n(D) tires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.09375, "loss": 4.2880659103393555 }, { "id": "identity_292775", "image": "A-OKVQA/images/4968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of concrete area is currently inaccessible in this location? \nChoices:\n(A) newly poured\n(B) asphalt\n(C) road turn\n(D) broken" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.421875, "loss": 4.00567626953125 }, { "id": "identity_202591", "image": "A-OKVQA/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the pannier?" }, { "from": "gpt", "value": "The answer is Sierra Phantom Pro. Mountaineer Fishing Guide Resort for Wild Trout Eastern Sierra Bishop, CA" } ], "clip_score": 31.1875, "loss": 4.11909818649292 }, { "id": "identity_287455", "image": "PMC-VQA/images/PMC7535690_F2_266133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the state of the cornea shown in the image?\nChoices:\n(A) Thin and smooth surface \n(B) Thickened and rough surface \n(C) Normal thickness and smooth surface \n(D) Thickened and smooth surface" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.200796127319336 }, { "id": "identity_255011", "image": "PMC-VQA/images/PMC4304244_F8_352529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of backward deformation, and how can they be identified in the given images?" }, { "from": "gpt", "value": "The answer is Structures appear retracted, indicating backward movement" } ], "clip_score": 26.6875, "loss": 4.324360370635986 }, { "id": "identity_330768", "image": "A-OKVQA/images/210342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of wet area is close by?\nChoices:\n(A) stream\n(B) pond\n(C) ocean\n(D) swimming pool" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.78125, "loss": 4.31076717376709 }, { "id": "identity_330306", "image": "FigureQA/images/72340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0625, "loss": 4.119918346405029 }, { "id": "identity_277747", "image": "DVQA/images/bar_train_00143914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.8125, "loss": 4.192802906036377 }, { "id": "identity_341", "image": "ScienceQA/images/16318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) sweet\n(B) fuzzy\n(C) opaque" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 4.500824928283691 }, { "id": "identity_262095", "image": "IconQA/images/18148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.28125, "loss": 4.681211471557617 }, { "id": "identity_315557", "image": "A-OKVQA/images/316438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this food item divided into smaller sections?\nChoices:\n(A) looks good\n(B) easier eating\n(C) to trash\n(D) see easier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.859375, "loss": 4.113243579864502 }, { "id": "identity_8602", "image": "DVQA/images/bar_train_00045256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.03125, "loss": 4.361536979675293 }, { "id": "identity_46472", "image": "DVQA/images/bar_train_00067172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest total sales across all stores?" }, { "from": "gpt", "value": "The answer is leader" } ], "clip_score": 29.40625, "loss": 4.147995948791504 }, { "id": "identity_187468", "image": "PMC-VQA/images/PMC5684645_F3_239345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color used to stain the cells in image B?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.515625, "loss": 4.143934726715088 }, { "id": "identity_264560", "image": "FigureQA/images/27638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.6875, "loss": 4.03001070022583 }, { "id": "identity_26022", "image": "A-OKVQA/images/385492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this desk setup?\nChoices:\n(A) in hallway\n(B) on train\n(C) at work\n(D) in library" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.71875, "loss": 4.023480415344238 }, { "id": "identity_106391", "image": "VQA-AS/images/5562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the background?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.28125, "loss": 4.660527229309082 }, { "id": "identity_288888", "image": "PMC-VQA/images/PMC8289867_Fig5_11991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the position of the image in relation to the viewer?\nChoices:\n(A) anterior view \n(B) posterior view \n(C) medial view \n(D) lateral view" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.7734375, "loss": 4.592563629150391 }, { "id": "identity_305201", "image": "A-OKVQA/images/218482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the large object in the back right?\nChoices:\n(A) baseball stadium\n(B) statue\n(C) elephant\n(D) van" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.03125, "loss": 4.380415439605713 }, { "id": "identity_281122", "image": "FigureQA/images/91265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dim Gray the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.109375, "loss": 4.183896541595459 }, { "id": "identity_12028", "image": "A-OKVQA/images/422295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does this cow live?\nChoices:\n(A) city\n(B) zoo\n(C) farm\n(D) fair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.434928894042969 }, { "id": "identity_28503", "image": "TQA/images/abc_question_images/parts_neuron_17234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the labeled elements shows the nucleus of the neuron?\nChoices:\n(A) M\n(B) U\n(C) A\n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.140625, "loss": 4.2322282791137695 }, { "id": "identity_190967", "image": "VQA2.0/images/364221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.4375, "loss": 4.10350227355957 }, { "id": "identity_270339", "image": "IconQA/images/56150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a flat surface or is it solid?\nChoices:\n(A) flat\n(B) solid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.234375, "loss": 4.4789533615112305 }, { "id": "identity_113", "image": "FigureQA/images/25657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.671875, "loss": 4.668147087097168 }, { "id": "identity_281947", "image": "A-OKVQA/images/33215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which word phonetically resembles the first word on the sign located at the top left corner?\nChoices:\n(A) bark\n(B) good\n(C) slop\n(D) toe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.67578125, "loss": 4.079375267028809 }, { "id": "identity_265939", "image": "ScienceQA/images/10295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these two objects?\nChoices:\n(A) salty\n(B) bendable" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.265625, "loss": 4.529509544372559 }, { "id": "identity_24416", "image": "A-OKVQA/images/532704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of television display technology is being used in the living room?\nChoices:\n(A) oled\n(B) crt\n(C) plasma\n(D) lcd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8671875, "loss": 4.3554911613464355 }, { "id": "identity_333496", "image": "PMC-VQA/images/PMC7513504_Fig1_260585.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the enlarged lymph node located?\nChoices:\n(A) Left Axilla\n(B) Right Axilla\n(C) Groin\n(D) Neck" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.015625, "loss": 4.000107765197754 }, { "id": "identity_292117", "image": "A-OKVQA/images/395046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the individuals on the train traveling?\nChoices:\n(A) consuming food\n(B) acquiring driving skills\n(C) commuting\n(D) sightseeing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.0, "loss": 4.134355068206787 }, { "id": "identity_273795", "image": "AI2D/images/2273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is used to represent Conidium in the given options?\n(A) G\n(B) E\n(C) C\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.53125, "loss": 4.8992815017700195 }, { "id": "identity_305306", "image": "A-OKVQA/images/165200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the middle advertisement on the wall about?\nChoices:\n(A) athletic help\n(B) banking\n(C) suicide hotline\n(D) cellular service" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.484375, "loss": 4.265179634094238 }, { "id": "identity_329530", "image": "PlotQA/images/8562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the legend located in the graph?" }, { "from": "gpt", "value": "The answer is bottom center" } ], "clip_score": 17.828125, "loss": 4.228368759155273 }, { "id": "identity_111487", "image": "A-OKVQA/images/104931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen using the moving walkway in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.765625, "loss": 4.646424770355225 }, { "id": "identity_245214", "image": "A-OKVQA/images/73837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the red and white tower, and how does it contribute to the safety of the surrounding area?" }, { "from": "gpt", "value": "The answer is Safely dispose of excess gases to prevent explosions" } ], "clip_score": 21.625, "loss": 4.123125076293945 }, { "id": "identity_29238", "image": "TQA/images/question_images/velocity_time_graphs_8222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the blue line increase or decrease on deceleration?\nChoices:\n(A) decrease\n(B) increase\n(C) stay the same\n(D) curves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.0625, "loss": 4.280600547790527 }, { "id": "identity_287961", "image": "VizWiz/images/VizWiz_train_00016714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of flowers these are?" }, { "from": "gpt", "value": "The answer is pink" } ], "clip_score": 26.265625, "loss": 4.10653829574585 }, { "id": "identity_313169", "image": "ScienceQA/images/7189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Olympia\n(B) Huntington\n(C) Seattle\n(D) Laramie" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.921875, "loss": 4.1761155128479 }, { "id": "identity_285493", "image": "A-OKVQA/images/450833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the cones in marking the following:\n(A) holes\n(B) finish\n(C) lanes\n(D) danger" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.546875, "loss": 4.596191883087158 }, { "id": "identity_328473", "image": "ScienceQA/images/10087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony highlighted?\nChoices:\n(A) North Carolina\n(B) South Carolina\n(C) Connecticut\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.734375, "loss": 4.414206504821777 }, { "id": "identity_225202", "image": "VQA-AS/images/10277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items are on the couch with the boy, and how many items are there in total?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 27.984375, "loss": 4.820486068725586 }, { "id": "identity_327239", "image": "PMC-VQA/images/PMC5432911_Fig8_165195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure labeled \"d\" in this image?\nChoices:\n(A) Habit\n(B) Stamen\n(C) Bract\n(D) Leaf" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.390625, "loss": 4.304913520812988 }, { "id": "identity_331466", "image": "PMC-VQA/images/PMC7770742_Fig1_331507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the initial radiological finding in this scan?\nChoices:\n(A) Consolidation\n(B) Cavitation\n(C) Ground Glass Opacities\n(D) Cavitary nodules" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.52679967880249 }, { "id": "identity_263727", "image": "DVQA/images/bar_train_00077535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For any dataset, which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is campus" } ], "clip_score": 31.1875, "loss": 4.185675621032715 }, { "id": "identity_222921", "image": "A-OKVQA/images/305170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the sidewalk?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 22.140625, "loss": 4.442523002624512 }, { "id": "identity_267705", "image": "FigureQA/images/97795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.546875, "loss": 4.062526702880859 }, { "id": "identity_69276", "image": "TQA/images/question_images/human_system_muscular_6154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many muscles are labeled on the posterior side of the leg below the knee?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.03125, "loss": 4.419314384460449 }, { "id": "identity_111347", "image": "TextVQA/images/03577bee21c7d19e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many microphones are visible on the podium?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.75, "loss": 4.224025726318359 }, { "id": "identity_264478", "image": "AI2D/images/4773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram represents The Spike?\nOptions:\n(A) D\n(B) F\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.2421875, "loss": 4.097078323364258 }, { "id": "identity_199231", "image": "PMC-VQA/images/PMC3576199_f1-ol-05-03-0840_186418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the lesion observed in the CT scan?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 30.671875, "loss": 4.349103927612305 }, { "id": "identity_305402", "image": "DocVQA/images/kklg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name listed?" }, { "from": "gpt", "value": "The answer is Irene Karl" } ], "clip_score": 22.859375, "loss": 4.875324726104736 }, { "id": "identity_335516", "image": "PMC-VQA/images/PMC4455327_Fig4_393480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of these MR images in this study?\nChoices:\n(A) Horizontal plane \n(B) Sagittal plane \n(C) Oblique plane \n(D) Vertical plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.28125, "loss": 4.4780449867248535 }, { "id": "identity_331411", "image": "A-OKVQA/images/60387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they doing?\nChoices:\n(A) waiting\n(B) smoking\n(C) drinking\n(D) eating" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.953125, "loss": 4.279044151306152 }, { "id": "identity_297514", "image": "A-OKVQA/images/91105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which British sport could this ball potentially be utilized?\nChoices:\n(A) rugby\n(B) soccer\n(C) bowls\n(D) golf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.076837062835693 }, { "id": "identity_327097", "image": "PMC-VQA/images/PMC7487555_Fig6_252843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture the images?\nChoices:\n(A) CT scan\n(B) MRI scan\n(C) AFM imaging\n(D) X-ray crystallography" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0625, "loss": 4.088282585144043 }, { "id": "identity_167951", "image": "IconQA/images/70913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object appears most frequently in the sequence?" }, { "from": "gpt", "value": "The answer is Blue boats" } ], "clip_score": 26.328125, "loss": 4.440374851226807 }, { "id": "identity_271732", "image": "AI2D/images/3138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What lies directly beneath the cuticle?\nChoices:\n(A) Epidermis\n(B) Guard cells\n(C) Stomata\n(D) Skin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.265625, "loss": 4.757987022399902 }, { "id": "identity_112414", "image": "TextVQA/images/99399dddba6cbf90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many American flags can be seen in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 20.140625, "loss": 4.605907440185547 }, { "id": "identity_293949", "image": "PMC-VQA/images/PMC4604556_f2_433091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the visualization depict?\nOptions:\n(A) An overhead view of An. sinensis\n(B) An underside view of An. sinensis\n(C) A cross-sectional view of An. sinensis\n(D) A side view of An. sinensis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.21875, "loss": 4.016558647155762 }, { "id": "identity_158669", "image": "TQA/images/question_images/earth_moon_phases_1237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the phase that appears between the Waxing Crescent and First Quarter phases?" }, { "from": "gpt", "value": "The answer is No phase between" } ], "clip_score": 24.078125, "loss": 4.375845909118652 }, { "id": "identity_109176", "image": "TextVQA/images/23bfdbe38957354f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times is the word 'serotonin' mentioned on the label?" }, { "from": "gpt", "value": "The answer is Once" } ], "clip_score": 18.53125, "loss": 4.209906101226807 }, { "id": "identity_104720", "image": "A-OKVQA/images/528067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 11.828125, "loss": 4.412600517272949 }, { "id": "identity_288185", "image": "A-OKVQA/images/169026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of bus is depicted in the image? \nOptions:\n(A) double-decker\n(B) school\n(C) shuttle\n(D) passenger" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.625, "loss": 4.381518363952637 }, { "id": "identity_311536", "image": "IconQA/images/99647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of hearts even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.9375, "loss": 4.319629192352295 }, { "id": "identity_261812", "image": "A-OKVQA/images/353231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the end bridges for the boat suspended in the middle of the river? \n(A) blue\n(B) white\n(C) red\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.515625, "loss": 4.209631443023682 }, { "id": "identity_304185", "image": "A-OKVQA/images/406075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white object on the person's waist used for?\nChoices:\n(A) floating\n(B) slimming\n(C) dress code\n(D) protecting backbone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.890625, "loss": 4.581774711608887 }, { "id": "identity_307651", "image": "AI2D/images/3551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following surrounds the cytoplasm of a cell?\nChoices:\n(A) cell wall\n(B) cell membrane\n(C) vacuolar membrane\n(D) vacuole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.40625, "loss": 4.193860054016113 }, { "id": "identity_210966", "image": "VizWiz/images/VizWiz_train_00003089.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of play equipment is featured in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Soft play steps" } ], "clip_score": 35.125, "loss": 4.207865238189697 }, { "id": "identity_18951", "image": "DVQA/images/bar_train_00087537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.8125, "loss": 4.466801166534424 }, { "id": "identity_116622", "image": "TextVQA/images/4ddd53485a1b0c32.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the mushrooms on sale?" }, { "from": "gpt", "value": "The answer is CUOCO" } ], "clip_score": 25.703125, "loss": 4.339416980743408 }, { "id": "identity_81936", "image": "GeoQA+/images/632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with D, E, and F as midpoints of the sides and the area of triangle ABC being 8 cm², what is the length of segment DE?" }, { "from": "gpt", "value": "The answer is Length of BC]/2" } ], "clip_score": 35.0, "loss": 4.231622219085693 }, { "id": "identity_309106", "image": "Super-CLEVR/images/superCLEVR_new_001855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a metal jet behind the purple metal bus that is left of the yellow object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.171875, "loss": 5.341336727142334 }, { "id": "identity_261838", "image": "DVQA/images/bar_train_00174916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.9375, "loss": 4.326048851013184 }, { "id": "identity_186371", "image": "PMC-VQA/images/PMC3389242_Fig6_143829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the catheter visible in the image?" }, { "from": "gpt", "value": "The answer is To inject contrast agent for vessel visualization" } ], "clip_score": 28.515625, "loss": 4.148494720458984 }, { "id": "identity_264399", "image": "FigureQA/images/85609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.078125, "loss": 4.165249824523926 }, { "id": "identity_336801", "image": "ScienceQA/images/8085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) South Carolina\n(B) Pennsylvania\n(C) Indiana\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.421875, "loss": 4.146697044372559 }, { "id": "identity_332501", "image": "PMC-VQA/images/PMC7489781_FIG1_253764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to identify calcifications in the brain?\nChoices:\n(A) CT scan\n(B) MRI\n(C) PET scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.84375, "loss": 4.19390344619751 }, { "id": "identity_315274", "image": "ScienceQA/images/5192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is a stuffed tiger?\nChoices:\n(A) a gas\n(B) a solid\n(C) a liquid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.6875, "loss": 4.309311866760254 }, { "id": "identity_64884", "image": "AI2D/images/3250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many centrioles are depicted in the cell diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.96875, "loss": 4.180969715118408 }, { "id": "identity_20120", "image": "PMC-VQA/images/PMC9102236_cancers-14-02274-f002_280298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the right panel of the image? \nChoices:\n(A) EUS-guided LA on porcine models after treatment \n(B) EUS-guided LA on pancreatic tissue before treatment \n(C) EUS-guided LA on pancreatic tissue after treatment \n(D) EUS-guided LA on porcine models before treatment " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.90625, "loss": 4.089471340179443 }, { "id": "identity_1676", "image": "PMC-VQA/images/PMC7830484_medicina-57-00073-f001_351577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white asterisk in the image indicate? \nChoices:\n(A) Left psoas muscle \n(B) Right psoas muscle \n(C) Fecal content in the colon \n(D) Gas content in the colon " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 5.042743682861328 }, { "id": "identity_25", "image": "IconQA/images/16669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a triangle as a face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.78125, "loss": 4.415360927581787 }, { "id": "identity_128178", "image": "A-OKVQA/images/280238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are decorated with lights?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.203125, "loss": 4.579458713531494 }, { "id": "identity_273912", "image": "FigureQA/images/15088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.890625, "loss": 4.4576239585876465 }, { "id": "identity_57273", "image": "DocVQA/images/fnxh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the factory mentioned in the document?" }, { "from": "gpt", "value": "The answer is Longmont Factory" } ], "clip_score": 22.71875, "loss": 5.070233345031738 }, { "id": "identity_130728", "image": "VizWiz/images/VizWiz_train_00016651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slices of fruit can be seen in the bowl?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 28.609375, "loss": 4.395426273345947 }, { "id": "identity_73709", "image": "ScienceQA/images/414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the highest specific humidity level within the outlined area?" }, { "from": "gpt", "value": "The answer is Dark green" } ], "clip_score": 26.65625, "loss": 4.6770853996276855 }, { "id": "identity_331721", "image": "A-OKVQA/images/299764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the individuals in orange and white having their discussion?\nChoices:\n(A) homeplate\n(B) pitcher's mound\n(C) 2nd base\n(D) outfield" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 4.269707202911377 }, { "id": "identity_38697", "image": "PMC-VQA/images/PMC6697975_Fig1_24848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the metal foreign body located? \nChoices:\n(A) Right chest cavity below the ipsilateral hemidiaphragm. \n(B) Left chest cavity above the ipsilateral hemidiaphragm. \n(C) Right chest cavity above the ipsilateral hemidiaphragm. \n(D) Left chest cavity below the ipsilateral hemidiaphragm. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.224691867828369 }, { "id": "identity_294059", "image": "PMC-VQA/images/PMC5773675_Fig2_265603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the red and black arrows in (c) among the following choices? \n(A) Solid nipple and porous gap \n(B) Porous nipple and solid gap \n(C) Flat surface and nanoporous gap \n(D) Flat surface and solid nipple" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.3149943351745605 }, { "id": "identity_185078", "image": "VizWiz/images/VizWiz_train_00009445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the LPG cylinder on the right associated with?" }, { "from": "gpt", "value": "The answer is Origin" } ], "clip_score": 35.875, "loss": 4.9113922119140625 }, { "id": "identity_331381", "image": "PMC-VQA/images/PMC9019117_F4_256692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to evaluate the results of the initial experiment?\nChoices:\n(A) MRI\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.484375, "loss": 4.16350793838501 }, { "id": "identity_13576", "image": "FigureQA/images/60547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Purple have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.984375, "loss": 4.509807109832764 }, { "id": "identity_330317", "image": "PMC-VQA/images/PMC5241480_fig1_115273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image displays the bone lesion identified by MRI?\nChoices:\n(A) (a)~(c) \n(B) (d)~(h) \n(C) (c) Transaxial T2 STIR image \n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.265625, "loss": 4.031975746154785 }, { "id": "identity_304842", "image": "VQA-AS/images/12857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is their left hand?" }, { "from": "gpt", "value": "The answer is on chair" } ], "clip_score": 20.796875, "loss": 4.035735130310059 }, { "id": "identity_33221", "image": "TabMWP/images/36951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.274876117706299 }, { "id": "identity_5693", "image": "ScienceQA/images/20681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Samoa\n(B) Papua New Guinea\n(C) New Zealand\n(D) Australia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.203125, "loss": 4.010313034057617 }, { "id": "identity_319054", "image": "PMC-VQA/images/PMC7912543_nanomaterials-11-00275-f004_376434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are depicted in the caption?\nChoices:\n(A) Fluorescence images.\n(B) Microscopic images of tissues.\n(C) SEM images.\n(D) Bioluminescence images." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.171875, "loss": 4.30131721496582 }, { "id": "identity_120923", "image": "A-OKVQA/images/373266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the door in the background?" }, { "from": "gpt", "value": "The answer is Nine" } ], "clip_score": 9.5546875, "loss": 4.157859802246094 }, { "id": "identity_332016", "image": "A-OKVQA/images/155997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the animal riding in?\nChoices:\n(A) trolley\n(B) wagon\n(C) bus\n(D) car" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.203125, "loss": 4.242003440856934 }, { "id": "identity_197881", "image": "A-OKVQA/images/176571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella closest to the camera?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.46875, "loss": 4.360965251922607 }, { "id": "identity_262232", "image": "IconQA/images/103541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.796875, "loss": 4.675408840179443 }, { "id": "identity_203476", "image": "A-OKVQA/images/168405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of citrus fruits are displayed?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.703125, "loss": 4.3807373046875 }, { "id": "identity_322357", "image": "PMC-VQA/images/PMC6692028_pone.0220922.g001_22949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the image in relation to the beak?\nChoices:\n(A) Sagittal \n(B) Transverse \n(C) Coronal \n(D) Oblique" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.6875, "loss": 4.691143989562988 }, { "id": "identity_305948", "image": "A-OKVQA/images/560083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these people showing?\nChoices:\n(A) pilots license\n(B) movie ticket\n(C) entrance ticket\n(D) police badge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.90625, "loss": 4.408925533294678 }, { "id": "identity_280916", "image": "DocVQA/images/qkxn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What steps should be taken to verify the stability of the implant fixation and ensure that the cup is securely positioned in the bone?" }, { "from": "gpt", "value": "The answer is with finger pressure" } ], "clip_score": 25.609375, "loss": 5.406577110290527 }, { "id": "identity_287358", "image": "ScienceQA/images/18642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Michigan?\nChoices:\n(A) Little Rock\n(B) Lansing\n(C) Grand Rapids\n(D) Springfield" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.25, "loss": 4.146401882171631 }, { "id": "identity_5193", "image": "VQA2.0/images/169510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man sitting on?" }, { "from": "gpt", "value": "The answer is cooler" } ], "clip_score": 17.625, "loss": 4.740399360656738 }, { "id": "identity_4496", "image": "Super-CLEVR/images/superCLEVR_new_000224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the tiny object behind the tiny gray metallic object and the regular bus have the same material?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.1875, "loss": 5.417392730712891 }, { "id": "identity_72082", "image": "AI2D/images/22.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in the food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.84375, "loss": 4.687590599060059 }, { "id": "identity_35200", "image": "ScienceQA/images/20901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Trenton\n(B) Providence\n(C) Baton Rouge\n(D) Burlington" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.828125, "loss": 4.1141037940979 }, { "id": "identity_34074", "image": "A-OKVQA/images/440329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the people standing?\nChoices:\n(A) boxing ring\n(B) junkyard\n(C) pool\n(D) parking lot" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.265625, "loss": 4.376315116882324 }, { "id": "identity_211715", "image": "TextVQA/images/ca75eea8dfd23a29.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the clock shown in the image?" }, { "from": "gpt", "value": "The answer is Zurich Silence" } ], "clip_score": 31.234375, "loss": 4.982681751251221 }, { "id": "identity_7669", "image": "FigureQA/images/10306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.168042182922363 }, { "id": "identity_327896", "image": "PMC-VQA/images/PMC4860770_Fig24_28776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnosis in this situation?\nChoices:\n(A) Ehlers-Danlos syndrome\n(B) Marfan's syndrome\n(C) Turner syndrome\n(D) Down syndrome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 4.112773418426514 }, { "id": "identity_297012", "image": "IconQA/images/5435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on: pink or white?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.84375, "loss": 4.1635661125183105 }, { "id": "identity_35934", "image": "TabMWP/images/30816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.03125, "loss": 4.146492004394531 }, { "id": "identity_326985", "image": "PMC-VQA/images/PMC6081847_Fig3_351763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the small arrow in the image point to?\nChoices:\n(A) Normal anatomy \n(B) Malformed middle ear \n(C) Malformed cochlea \n(D) Hypoplastic mastoid cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.6875, "loss": 4.434876441955566 }, { "id": "identity_312587", "image": "VizWiz/images/VizWiz_train_00014543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you read this text for me?" }, { "from": "gpt", "value": "The answer is feedlot" } ], "clip_score": 27.53125, "loss": 4.70867395401001 }, { "id": "identity_285450", "image": "PMC-VQA/images/PMC8003397_cancers-13-01396-f007_404170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to visualize the lesion in the anterior bladder? \nOptions:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.28125, "loss": 4.026716709136963 }, { "id": "identity_322593", "image": "TabMWP/images/3474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which day had the highest number of pumpkins sold?\nChoices:\n(A) Friday\n(B) Saturday\n(C) Sunday\n(D) Monday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.4375, "loss": 4.022787570953369 }, { "id": "identity_21395", "image": "TQA/images/abc_question_images/nuclear_energy_18120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to Cs-140?\nChoices:\n(A) K\n(B) S\n(C) W\n(D) J" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9765625, "loss": 4.002480506896973 }, { "id": "identity_289975", "image": "ScienceQA/images/18636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object have that matches one of the following choices: (A) yellow or (B) translucent?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.765625, "loss": 4.0083441734313965 }, { "id": "identity_5924", "image": "AI2D/images/4893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label is pointing to the crater?\nChoices:\n(A) B\n(B) D\n(C) C\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.094927787780762 }, { "id": "identity_221074", "image": "TextVQA/images/543fe0036025ce30.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the building roofs in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.375, "loss": 4.158583641052246 }, { "id": "identity_192728", "image": "PMC-VQA/images/PMC6376781_Fig2_436488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Transverse or axial orientation, posterior labeled" } ], "clip_score": 28.65625, "loss": 4.889647006988525 }, { "id": "identity_58395", "image": "DocVQA/images/nmwn0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the email address of the recipient of the original message?" }, { "from": "gpt", "value": "The answer is jebit@jbjs.jnj.com>" } ], "clip_score": 25.78125, "loss": 4.604672431945801 }, { "id": "identity_292762", "image": "PMC-VQA/images/PMC8212933_F4_470927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is depicted in the scanning electron micrograph? Is it:\n(A) Ribosomes\n(B) Mitochondria\n(C) Tectorial membrane\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.53125, "loss": 4.411460876464844 }, { "id": "identity_278245", "image": "DVQA/images/bar_train_00021839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the color mediumpurple?" }, { "from": "gpt", "value": "The answer is gay" } ], "clip_score": 23.046875, "loss": 4.030664443969727 }, { "id": "identity_29784", "image": "ScienceQA/images/17615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Saint Lucia\n(B) Haiti\n(C) Saint Vincent and the Grenadines\n(D) Barbados" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0625, "loss": 4.316557884216309 }, { "id": "identity_32821", "image": "PMC-VQA/images/PMC7909442_brainsci-11-00133-f001_375074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which orientation is represented by the green color on the right figures? \nChoices:\n(A) Inferior - Superior. \n(B) Right - Left. \n(C) Anterior - Posterior. \n(D) Unknown orientation. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.328125, "loss": 4.477046966552734 }, { "id": "identity_104359", "image": "A-OKVQA/images/100535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the cafe's name written in?" }, { "from": "gpt", "value": "The answer is Italian" } ], "clip_score": 23.28125, "loss": 4.271673679351807 }, { "id": "identity_318858", "image": "TQA/images/abc_question_images/faults_11701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the fault plane?\nChoices:\n(A) S\n(B) NONE\n(C) P\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.28125, "loss": 4.456419944763184 }, { "id": "identity_314424", "image": "DVQA/images/bar_train_00144288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 23.359375, "loss": 4.5405120849609375 }, { "id": "identity_261609", "image": "PMC-VQA/images/PMC7398228_Fig2_224914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted by the triangle in the 3D RCA development image?\nChoices:\n(A) normal PComA origin \n(B) aneurysm \n(C) normal PCA origin \n(D) RCA system origin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.28125, "loss": 4.6619133949279785 }, { "id": "identity_317839", "image": "PMC-VQA/images/PMC5075973_F2_87014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain structure is affected by this? \nChoices:\n(A) Cerebellum \n(B) Hippocampus \n(C) Globus Pallidus \n(D) Thalamus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.765625, "loss": 4.0274529457092285 }, { "id": "identity_16617", "image": "PMC-VQA/images/PMC6918181_polymers-11-01874-f001_85779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material of the object shown in the image? \nChoices:\n(A) Low-density polyethylene (LDPE) \n(B) Polystyrene (PS) \n(C) High-density polyethylene (HDPE) \n(D) Polyvinyl chloride (PVC) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.96875, "loss": 4.2427167892456055 }, { "id": "identity_6546", "image": "ScienceQA/images/5131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Ohio\n(C) New Hampshire\n(D) Vermont" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.9375, "loss": 4.0502166748046875 }, { "id": "identity_25285", "image": "IconQA/images/11944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) impossible\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.019705295562744 }, { "id": "identity_313744", "image": "Super-CLEVR/images/superCLEVR_new_000194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a yellow airliner behind the brown metal object that is on the right side of the matte object to the left of the tiny gray matte car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.5625, "loss": 5.350833892822266 }, { "id": "identity_324863", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f001_203185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone of the sacrum is involved in this type of fracture?\nChoices:\n(A) Zone I \n(B) Zone II \n(C) Zone III \n(D) Zone IV" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.65625, "loss": 4.01167106628418 }, { "id": "identity_312596", "image": "DocVQA/images/qtmd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of this document?" }, { "from": "gpt", "value": "The answer is Thermal analysis System for alpha" } ], "clip_score": 22.671875, "loss": 5.457191467285156 }, { "id": "identity_5907", "image": "A-OKVQA/images/458673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the pillars green?\nChoices:\n(A) paint\n(B) moss\n(C) mold\n(D) rust" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.703125, "loss": 4.727088451385498 }, { "id": "identity_215964", "image": "A-OKVQA/images/10321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanterns are attached to the gate in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.71875, "loss": 4.784966468811035 }, { "id": "identity_297596", "image": "A-OKVQA/images/286234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During her practice session, the tennis player is enclosed by nets because she is playing against whom?\nChoices:\n(A) nobody\n(B) audience\n(C) player\n(D) machine" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.125, "loss": 4.2153120040893555 }, { "id": "identity_278842", "image": "PMC-VQA/images/PMC3535131_Fig5_176625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the classification of the fracture found in the SE-4 fracture? Is it:\n(A) Transverse fracture\n(B) Comminuted fracture\n(C) Spiral or oblique fracture\n(D) Compression fracture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.375, "loss": 4.608735084533691 }, { "id": "identity_36045", "image": "DocVQA/images/jxcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the result mention in this document ?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 23.515625, "loss": 4.579174995422363 }, { "id": "identity_109302", "image": "Super-CLEVR/images/superCLEVR_new_000090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the small car located to the left of the golden car?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.625, "loss": 4.282649040222168 }, { "id": "identity_11527", "image": "VQA-AS/images/2301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the rug?" }, { "from": "gpt", "value": "The answer is burgundy and gold" } ], "clip_score": 23.0625, "loss": 4.007752895355225 }, { "id": "identity_18504", "image": "AI2D/images/1352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the earth orbit?\nChoices:\n(A) Saturn\n(B) Jupiter\n(C) Sun\n(D) Milky Way" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.453125, "loss": 4.0349531173706055 }, { "id": "identity_335178", "image": "Super-CLEVR/images/superCLEVR_new_000737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object that is positioned in front of the blue shiny object and behind the large shiny bike?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 29.0625, "loss": 5.237339496612549 }, { "id": "identity_196435", "image": "PMC-VQA/images/PMC7564854_F0003_275748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What panel of the series does this image represent?" }, { "from": "gpt", "value": "The answer is Third panel" } ], "clip_score": 18.40625, "loss": 4.46090841293335 }, { "id": "identity_316928", "image": "FigureQA/images/64448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.592308521270752 }, { "id": "identity_324082", "image": "A-OKVQA/images/221328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of garment that the person in blue is wearing?\nChoices:\n(A) raincoat\n(B) chut thai\n(C) kimono\n(D) sari" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.640625, "loss": 4.45120096206665 }, { "id": "identity_228231", "image": "VQA-AS/images/809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are green, and what are they?" }, { "from": "gpt", "value": "The answer is Two objects, plant and picture frame" } ], "clip_score": 15.8828125, "loss": 4.323734283447266 }, { "id": "identity_150911", "image": "ScienceQA/images/5562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are highlighted in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.5, "loss": 4.288466453552246 }, { "id": "identity_120142", "image": "VQA2.0/images/459537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski tracks are visible in the snow?" }, { "from": "gpt", "value": "The answer is Two tracks" } ], "clip_score": 27.234375, "loss": 4.391188144683838 }, { "id": "identity_138984", "image": "DVQA/images/bar_train_00143920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the least preferred sub-category in the 'widow' category?" }, { "from": "gpt", "value": "The answer is movie" } ], "clip_score": 22.703125, "loss": 4.150694370269775 }, { "id": "identity_105418", "image": "VQA-AS/images/3634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheeled vehicles are present in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.0, "loss": 4.364820957183838 }, { "id": "identity_316816", "image": "FigureQA/images/42017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.661677360534668 }, { "id": "identity_194016", "image": "PMC-VQA/images/PMC4730158_f4_466715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the majority of the structures in the image?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 20.390625, "loss": 4.356299877166748 }, { "id": "identity_294144", "image": "A-OKVQA/images/443805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is displayed on the clock?\nChoices:\n(A) cat\n(B) cherub statues\n(C) gravy\n(D) hat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 4.221982955932617 }, { "id": "identity_262434", "image": "ScienceQA/images/13060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage about the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 4.9375, "loss": 4.85079288482666 }, { "id": "identity_216085", "image": "Super-CLEVR/images/superCLEVR_new_000390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Tan or beige" } ], "clip_score": 23.875, "loss": 4.258483409881592 }, { "id": "identity_11166", "image": "A-OKVQA/images/64961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The shade held by the teal umbrella pole was crafted in which manner?\nChoices:\n(A) carving\n(B) sewing machine\n(C) weaving\n(D) axe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.40625, "loss": 4.204170227050781 }, { "id": "identity_336507", "image": "PMC-VQA/images/PMC7532002_fig2_265128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in the rectangle in image A?\nChoices:\n(A) Brainstem\n(B) Spinal cord\n(C) Fiber tract\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.53125, "loss": 4.257781982421875 }, { "id": "identity_112795", "image": "TextVQA/images/32b0497aa73d65c2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many smaller spacecraft are engaged in the scene?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.703125, "loss": 4.783541679382324 }, { "id": "identity_290454", "image": "A-OKVQA/images/393845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the collar on the shirt of the man who has his tie tied?\nChoices:\n(A) green\n(B) black\n(C) white\n(D) blue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.6796875, "loss": 4.091976165771484 }, { "id": "identity_312873", "image": "FigureQA/images/95979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.875, "loss": 4.387751579284668 }, { "id": "identity_118796", "image": "TextVQA/images/882dce3f2ea4f1df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word on the right page?" }, { "from": "gpt", "value": "The answer is here" } ], "clip_score": 22.4375, "loss": 4.198107719421387 }, { "id": "identity_311008", "image": "VQA-AS/images/19535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why is the person so surprised?" }, { "from": "gpt", "value": "The answer is girl has taken all books down and put her animals on bookcase" } ], "clip_score": 23.421875, "loss": 4.209424018859863 }, { "id": "identity_305303", "image": "FigureQA/images/2724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than that?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.296875, "loss": 4.052279949188232 }, { "id": "identity_35721", "image": "A-OKVQA/images/193663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this player getting ready to do?\nChoices:\n(A) swing\n(B) dunk\n(C) throw\n(D) dribble" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.734375, "loss": 4.421854496002197 }, { "id": "identity_292148", "image": "A-OKVQA/images/352247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific function of the larger containers among the following options? \n(A) preserving temperature\n(B) storing wine\n(C) containing plants\n(D) transporting goods" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 4.26171875, "loss": 4.629074573516846 }, { "id": "identity_33035", "image": "A-OKVQA/images/546664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is he standing like that?\nChoices:\n(A) falling\n(B) tired\n(C) slipping\n(D) maintain balance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.8984375, "loss": 4.193689823150635 }, { "id": "identity_148810", "image": "DocVQA/images/hmvj0223_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the audit firm?" }, { "from": "gpt", "value": "The answer is NARESH" } ], "clip_score": 19.671875, "loss": 4.969577312469482 }, { "id": "identity_277046", "image": "TabMWP/images/18634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In 2004, which crop had the lowest cost per cwt according to the crop prices monitored by the Idaho farmer?\nChoices:\n(A) canola\n(B) potatoes\n(C) apples\n(D) lentils" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 42.40625, "loss": 4.772095680236816 }, { "id": "identity_300405", "image": "DocVQA/images/hkfg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being discussed in this meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Notes" } ], "clip_score": 23.96875, "loss": 4.634533882141113 }, { "id": "identity_22835", "image": "A-OKVQA/images/387895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does this person have a bag with them?\nChoices:\n(A) lunch\n(B) travelling\n(C) shopping\n(D) hiking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.375, "loss": 4.332245349884033 }, { "id": "identity_266692", "image": "FigureQA/images/32790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.234375, "loss": 4.663617134094238 }, { "id": "identity_10359", "image": "VQA2.0/images/427461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the people spectating?" }, { "from": "gpt", "value": "The answer is animals" } ], "clip_score": 19.296875, "loss": 4.3289995193481445 }, { "id": "identity_9962", "image": "FigureQA/images/36854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.6875, "loss": 4.161093711853027 }, { "id": "identity_283574", "image": "A-OKVQA/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purposes are the colorful lights typically utilized?\nChoices:\n(A) parades\n(B) decoration\n(C) controlling traffic\n(D) dancing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5, "loss": 4.176748275756836 }, { "id": "identity_295418", "image": "PMC-VQA/images/PMC4559320_Fig13_420629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow in the image point to?\nChoices:\n(A) A mitochondria \n(B) A healthy synapse \n(C) An electron dense degenerating synapse \n(D) A pre-synapse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.359375, "loss": 4.191758632659912 }, { "id": "identity_26670", "image": "PMC-VQA/images/PMC5368167_F1_145882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI was used to capture the image? \nChoices:\n(A) T1-weighted MRI \n(B) T2-weighted MRI \n(C) CT scan \n(D) PET scan " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.65625, "loss": 4.201043128967285 }, { "id": "identity_324132", "image": "A-OKVQA/images/334125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the square items showing a sort of necklace?\nChoices:\n(A) snacks\n(B) mail\n(C) flying\n(D) computer chips" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.3125, "loss": 4.636883735656738 }, { "id": "identity_212213", "image": "VQA2.0/images/489107.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated at the outdoor cafe?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 18.171875, "loss": 4.2695488929748535 }, { "id": "identity_4805", "image": "IconQA/images/99857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rectangles even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.75, "loss": 4.179866313934326 }, { "id": "identity_302703", "image": "PMC-VQA/images/PMC5072663_FIG1_86034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to measure scalp thickness?\nChoices:\n(A) MRI \n(B) X-Ray \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.140625, "loss": 4.0783185958862305 }, { "id": "identity_190236", "image": "VQA2.0/images/535460.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of baked goods are visible on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.515625, "loss": 4.0879411697387695 }, { "id": "identity_275622", "image": "PMC-VQA/images/PMC6879599_F5_74476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the boundary indicated by the green colored line? Is it:\n(A) The posterior edge of AB/ML.\n(B) The superior edge of AB/ML.\n(C) The inferior edge of AB/ML.\n(D) The anterior edge of AB/ML." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.828125, "loss": 4.039267539978027 }, { "id": "identity_200490", "image": "A-OKVQA/images/572879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.03125, "loss": 4.6946916580200195 }, { "id": "identity_264319", "image": "FigureQA/images/22288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Tomato or Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.40625, "loss": 4.770137310028076 }, { "id": "identity_66926", "image": "ScienceQA/images/11725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be inferred about the environment where the middle fossil was formed?" }, { "from": "gpt", "value": "The answer is Marine or water-rich environment" } ], "clip_score": 22.734375, "loss": 4.359391212463379 }, { "id": "identity_271288", "image": "IconQA/images/29540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.40625, "loss": 4.7244462966918945 }, { "id": "identity_112934", "image": "A-OKVQA/images/567572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many limbs of the doll are in contact with the white surface?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.078125, "loss": 4.593077659606934 }, { "id": "identity_315001", "image": "IconQA/images/77062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 4.207892894744873 }, { "id": "identity_13058", "image": "Super-CLEVR/images/superCLEVR_new_000296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any blue bicycles to the right of the large rubber bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.921875, "loss": 5.357677459716797 }, { "id": "identity_296167", "image": "ScienceQA/images/3384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does a gold bracelet belong to: solid, liquid, or gas?\nChoices:\n(A) a gas\n(B) a solid\n(C) a liquid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.0, "loss": 4.0184502601623535 }, { "id": "identity_288521", "image": "ScienceQA/images/17200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this chair?\nChoices:\n(A) wood\n(B) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.6875, "loss": 4.194432258605957 }, { "id": "identity_76584", "image": "AI2D/images/1812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the grass play in this food chain?" }, { "from": "gpt", "value": "The answer is Primary producer" } ], "clip_score": 25.640625, "loss": 5.132446765899658 }, { "id": "identity_326743", "image": "A-OKVQA/images/238178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is it called when Hulk Hogan assumes the position they are in?\nChoices:\n(A) leg drop\n(B) headbutt\n(C) flexing\n(D) body slam" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.21875, "loss": 4.429908752441406 }, { "id": "identity_316161", "image": "IconQA/images/66123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a house. What is the second image?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.640625, "loss": 4.130466461181641 }, { "id": "identity_319827", "image": "A-OKVQA/images/432912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the cones orange in color?\nChoices:\n(A) camouflage\n(B) visibility\n(C) design\n(D) random pick" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.609375, "loss": 4.260474681854248 }, { "id": "identity_313461", "image": "TQA/images/question_images/human_system_muscular_6155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a muscle located above the Achilles tendon?\nChoices:\n(A) Extensors\n(B) Trapezius\n(C) Soleus\n(D) Hamstrings" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.265625, "loss": 4.179835319519043 }, { "id": "identity_34254", "image": "A-OKVQA/images/364522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the silver object made of that the man is carrying?\nChoices:\n(A) nylon\n(B) wood\n(C) metal\n(D) plastic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.8125, "loss": 4.401226043701172 }, { "id": "identity_273989", "image": "AI2D/images/2535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which phase of the moon does it appear darkest? \nOptions:\n(A) new\n(B) waning gibbous\n(C) full\n(D) waning crescent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.96875, "loss": 4.205798149108887 }, { "id": "identity_121174", "image": "VQA-AS/images/15243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.5625, "loss": 4.457295894622803 }, { "id": "identity_109981", "image": "Super-CLEVR/images/superCLEVR_new_000365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object to the left of the scooter?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.65625, "loss": 4.242794513702393 }, { "id": "identity_253820", "image": "PMC-VQA/images/PMC8585437_materials-14-06539-f007_110082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the turbine fraction shown in the SEM image and explain the distinguishing features that led to this identification" }, { "from": "gpt", "value": "The answer is Fraction III" } ], "clip_score": 26.40625, "loss": 4.4916839599609375 }, { "id": "identity_281509", "image": "PMC-VQA/images/PMC8546700_fig4_96332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the bright-field image overlay among the following choices? \n(A) Microscopic view of a mosquito \n(B) Microscopic view of a sporozoite \n(C) Microscopic view of a skin cell \n(D) Microscopic view of a hair follicle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.161013603210449 }, { "id": "identity_285757", "image": "VizWiz/images/VizWiz_train_00003279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of soda?" }, { "from": "gpt", "value": "The answer is aef" } ], "clip_score": 25.75, "loss": 6.271118640899658 }, { "id": "identity_246292", "image": "VizWiz/images/VizWiz_train_00002901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible letters on the window, and what could they potentially spell out?" }, { "from": "gpt", "value": "The answer is seis" } ], "clip_score": 27.265625, "loss": 7.015589714050293 }, { "id": "identity_31311", "image": "PMC-VQA/images/PMC3996258_F3_283260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in red in the surface charge image of PvD1r? \nChoices:\n(A) Positive surface charge \n(B) Negative surface charge \n(C) Neutral surface charge \n(D) No surface charge is shown " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.203125, "loss": 4.264476299285889 }, { "id": "identity_293074", "image": "VizWiz/images/VizWiz_train_00010803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unknown" } ], "clip_score": 14.7578125, "loss": 4.0168232917785645 }, { "id": "identity_320162", "image": "A-OKVQA/images/16964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is depicted?\nChoices:\n(A) country\n(B) city\n(C) forest\n(D) mountains" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.390625, "loss": 4.6622490882873535 }, { "id": "identity_264578", "image": "AI2D/images/1703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the point on the land or water surface directly above the focus of an earthquake?\nChoices:\n(A) land surface\n(B) epicentre\n(C) water surface\n(D) plate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 4.571078777313232 }, { "id": "identity_133038", "image": "VQA2.0/images/271592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is A tourist road train" } ], "clip_score": 27.390625, "loss": 4.1635589599609375 }, { "id": "identity_293279", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body was scanned in the CT scan? \n(A) Brain \n(B) Chest \n(C) Abdomen \n(D) Legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.364859580993652 }, { "id": "identity_74310", "image": "AI2D/images/1111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main parts are labeled in the image of the leaf?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.375, "loss": 4.637446403503418 }, { "id": "identity_302075", "image": "Super-CLEVR/images/superCLEVR_new_001606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer tiny rubber biplanes than some small red matte airliners?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.40625, "loss": 5.247293472290039 }, { "id": "identity_10162", "image": "ScienceQA/images/14853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.546875, "loss": 4.519867420196533 }, { "id": "identity_185292", "image": "TextVQA/images/da86ca8b8f4d42a4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the background?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 13.0234375, "loss": 4.510219097137451 }, { "id": "identity_34752", "image": "PMC-VQA/images/PMC6894893_FIG1_79450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the gestational sac seen in the ultrasound image? \nChoices:\n(A) Round \n(B) Oval \n(C) Elongated \n(D) Irregular " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.40625, "loss": 4.356016635894775 }, { "id": "identity_129422", "image": "PMC-VQA/images/PMC5265199_Fig3_118746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely cause of the fracture shown in the image?" }, { "from": "gpt", "value": "The answer is Direct blow or fall on the knee" } ], "clip_score": 26.890625, "loss": 4.281946659088135 }, { "id": "identity_30708", "image": "IconQA/images/32231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.890625, "loss": 4.409582614898682 }, { "id": "identity_283628", "image": "A-OKVQA/images/571141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe a skateboarder who skates with their right foot forward?\nChoices:\n(A) goofy\n(B) rider\n(C) regular\n(D) looser" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.90625, "loss": 4.128259658813477 }, { "id": "identity_147319", "image": "DVQA/images/bar_train_00166800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has a higher preference for iodine?" }, { "from": "gpt", "value": "The answer is handle" } ], "clip_score": 26.546875, "loss": 4.312668800354004 }, { "id": "identity_128575", "image": "VQA2.0/images/160064.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the shirts of the two people seated on the bench?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 18.328125, "loss": 4.810975074768066 }, { "id": "identity_7692", "image": "TQA/images/abc_question_images/human_system_eye_16135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the optic nerve?\nChoices:\n(A) N\n(B) H\n(C) A\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.46875, "loss": 4.075877666473389 }, { "id": "identity_203273", "image": "A-OKVQA/images/505469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bird likely to be eating the fruit?" }, { "from": "gpt", "value": "The answer is Not likely" } ], "clip_score": 28.484375, "loss": 4.059200763702393 }, { "id": "identity_309372", "image": "FigureQA/images/69628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.0078125, "loss": 4.045697212219238 }, { "id": "identity_309280", "image": "DocVQA/images/gznk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the director of food and agriculture?" }, { "from": "gpt", "value": "The answer is t. wallace" } ], "clip_score": 18.796875, "loss": 4.03546142578125 }, { "id": "identity_301724", "image": "FigureQA/images/84581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Navy Blue the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.65625, "loss": 4.185359001159668 }, { "id": "identity_318184", "image": "ScienceQA/images/3801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) South America\n(B) Antarctica\n(C) Europe\n(D) Australia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.53125, "loss": 4.017030715942383 }, { "id": "identity_272107", "image": "FigureQA/images/20844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the color with the highest median value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.1875, "loss": 4.7772674560546875 }, { "id": "identity_256006", "image": "PMC-VQA/images/PMC8598322_fig4_114321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key structural components visible in the image of the human eye, and what is their function?" }, { "from": "gpt", "value": "The answer is Lens and haptics; focusing light and stabilization" } ], "clip_score": 23.234375, "loss": 4.140919208526611 }, { "id": "identity_308259", "image": "AI2D/images/3985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost part of this structure called?\nChoices:\n(A) nuclear membrane\n(B) nucleolus\n(C) nuclear pore\n(D) chromatin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 4.295978546142578 }, { "id": "identity_52489", "image": "DVQA/images/bar_train_00057222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the lowest accuracy across all algorithms?" }, { "from": "gpt", "value": "The answer is strip" } ], "clip_score": 33.5625, "loss": 4.638962268829346 }, { "id": "identity_272978", "image": "A-OKVQA/images/411223.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is closest to the person: a banana, a barrel, a baby, or a tiger?\nChoices:\n(A) banana\n(B) barrel\n(C) baby\n(D) tiger" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.4609375, "loss": 4.245343208312988 }, { "id": "identity_269311", "image": "TextVQA/images/ff638b94043dd0b3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of this area?" }, { "from": "gpt", "value": "The answer is oracle" } ], "clip_score": 16.828125, "loss": 4.923203468322754 }, { "id": "identity_3126", "image": "IconQA/images/98207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 4.392731666564941 }, { "id": "identity_263", "image": "PMC-VQA/images/PMC8063773_F0001_423415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image of the left kidney show? \nChoices:\n(A) A small hypo-echoic mass. \n(B) A large hetero-echoic left renal mass. \n(C) A small avascular mass. \n(D) A large homogeneous left renal mass. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.328125, "loss": 4.485653400421143 }, { "id": "identity_25677", "image": "TextVQA/images/14f625e7b3f9e25b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the label for channel 17?" }, { "from": "gpt", "value": "The answer is california" } ], "clip_score": 20.671875, "loss": 4.133762836456299 }, { "id": "identity_28292", "image": "PMC-VQA/images/PMC6453619_FIG3_462444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging modality used in this image? \nChoices:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) PET scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.165219306945801 }, { "id": "identity_126162", "image": "VQA-RAD/images/synpic52932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible text indicating the source of this X-ray?" }, { "from": "gpt", "value": "The answer is Yes, the source is Keili D Trent" } ], "clip_score": 28.09375, "loss": 4.484968185424805 }, { "id": "identity_267084", "image": "Super-CLEVR/images/superCLEVR_new_000949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the tiny brown airplane share the same material as the tiny brown fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.671875, "loss": 5.2996721267700195 }, { "id": "identity_317171", "image": "A-OKVQA/images/389498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person extending their arm?\nChoices:\n(A) it hurts\n(B) taking selfie\n(C) holding kite\n(D) waving" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0625, "loss": 4.174056053161621 }, { "id": "identity_310569", "image": "PMC-VQA/images/PMC3934392_fig2_269369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the letters P, R, and * represent in the image?\nChoices:\n(A) Different pathologies\n(B) Different imaging techniques\n(C) Different body organs\n(D) Different spaces in the body" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.0625, "loss": 4.574255466461182 }, { "id": "identity_2467", "image": "Super-CLEVR/images/superCLEVR_new_001919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of small cyan matte cars behind the small brown shiny thing the same as the number of small red things?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.8125, "loss": 5.253496170043945 }, { "id": "identity_318612", "image": "FigureQA/images/79158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Gold the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 4.248315334320068 }, { "id": "identity_186484", "image": "PMC-VQA/images/PMC7882688_f3_367840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many protrusions labeled with 'P' are visible in Image A?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.0, "loss": 4.457640647888184 }, { "id": "identity_290606", "image": "A-OKVQA/images/346924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what main purpose are the illuminated white circles primarily utilized?\nChoices:\n(A) sports\n(B) showing time\n(C) mounting\n(D) decoration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.765625, "loss": 4.517902851104736 }, { "id": "identity_308734", "image": "A-OKVQA/images/61175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the person wearing a green shirt?\nChoices:\n(A) audience\n(B) referee\n(C) tennis player\n(D) staff" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.59375, "loss": 4.332545280456543 }, { "id": "identity_25718", "image": "TextVQA/images/378e06f3ea05441c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: which state is the watch on the left featuring?" }, { "from": "gpt", "value": "The answer is chris brown" } ], "clip_score": 27.9375, "loss": 4.728575706481934 }, { "id": "identity_17205", "image": "ScienceQA/images/18223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nThe volume of Saturn is more than ten times the volume of Uranus.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.140625, "loss": 4.273861408233643 }, { "id": "identity_2355", "image": "A-OKVQA/images/71264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of building does this dog live in?\nChoices:\n(A) highrise\n(B) duplex\n(C) bungalow\n(D) trailer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.484375, "loss": 4.532026767730713 }, { "id": "identity_144802", "image": "DocVQA/images/ylcn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections of the LCSA are mentioned in the second paragraph?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.78125, "loss": 4.590788841247559 }, { "id": "identity_36019", "image": "A-OKVQA/images/369221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What landscape is this location?\nChoices:\n(A) beach\n(B) plain\n(C) sand dune\n(D) desert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.640625, "loss": 4.423936367034912 }, { "id": "identity_13425", "image": "ScienceQA/images/5614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes the Great Barrier Reef ecosystem?\nChoices:\n(A) It has water with not much salt. It also has many different types of organisms.\n(B) It has salty water. It also has many different types of organisms." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.078125, "loss": 4.190053939819336 }, { "id": "identity_258065", "image": "IconQA/images/19853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be green?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 31.3125, "loss": 4.447630405426025 }, { "id": "identity_208997", "image": "A-OKVQA/images/66459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of building is on the right side of the image?" }, { "from": "gpt", "value": "The answer is An office or housing center" } ], "clip_score": 25.109375, "loss": 5.322295188903809 }, { "id": "identity_149315", "image": "DocVQA/images/grpy0080_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the contact person mentioned in the document?" }, { "from": "gpt", "value": "The answer is Mark Loving" } ], "clip_score": 23.078125, "loss": 4.331944465637207 }, { "id": "identity_137273", "image": "DVQA/images/bar_train_00054203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of units for the item week?" }, { "from": "gpt", "value": "The answer is focus" } ], "clip_score": 29.390625, "loss": 4.01900577545166 }, { "id": "identity_200385", "image": "TextVQA/images/558f6df5aa49aaf9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green-colored books can be seen on the shelves?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.078125, "loss": 4.650191783905029 }, { "id": "identity_118982", "image": "VQA2.0/images/245367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.390625, "loss": 4.379122257232666 }, { "id": "identity_212410", "image": "Super-CLEVR/images/superCLEVR_new_000672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located directly behind the green car?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 18.046875, "loss": 4.842767238616943 }, { "id": "identity_249041", "image": "IconQA/images/29722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What transformation sequence, including a flip and another transformation, can be applied to the letter on the left to obtain the letter on the right?" }, { "from": "gpt", "value": "The answer is horizontal flip only" } ], "clip_score": 28.53125, "loss": 4.091778755187988 }, { "id": "identity_326889", "image": "ScienceQA/images/9259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) blue\n(B) bouncy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.15625, "loss": 4.482216835021973 }, { "id": "identity_104908", "image": "VQA-AS/images/19149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is in the pot?" }, { "from": "gpt", "value": "The answer is A bush" } ], "clip_score": 12.453125, "loss": 4.430110931396484 }, { "id": "identity_267484", "image": "ScienceQA/images/3301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this basketball jersey?\nChoices:\n(A) polyester\n(B) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.375, "loss": 4.63440465927124 }, { "id": "identity_294127", "image": "PMC-VQA/images/PMC4844105_pone.0154174.g006_23944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the dorsal located in relation to other directions?\nChoices:\n(A) to the bottom and medial to the left \n(B) to the left and medial to the top \n(C) to the top and medial to the right \n(D) to the right and medial to the top" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.09375, "loss": 4.079123497009277 }, { "id": "identity_279474", "image": "Super-CLEVR/images/superCLEVR_new_000434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the small matte object have that matches the color of the metal articulated bus?" }, { "from": "gpt", "value": "The answer is jet" } ], "clip_score": 26.453125, "loss": 4.686108112335205 }, { "id": "identity_330876", "image": "PMC-VQA/images/PMC6974579_F4_104119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green staining in this image?\nChoices:\n(A) Cell membrane\n(B) Nucleus\n(C) Capillary PECAM-1\n(D) Mitochondria" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.625, "loss": 4.265242576599121 }, { "id": "identity_200838", "image": "PMC-VQA/images/PMC6287602_Figure1_409994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are indicated by the black arrows in the image?" }, { "from": "gpt", "value": "The answer is Foam cells" } ], "clip_score": 25.46875, "loss": 4.067046642303467 }, { "id": "identity_284281", "image": "TextVQA/images/6ac9c8857df1241d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of soccer ball are they currently kicking?" }, { "from": "gpt", "value": "The answer is volt" } ], "clip_score": 19.03125, "loss": 5.797677516937256 }, { "id": "identity_280712", "image": "A-OKVQA/images/334645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country is the headquarters of the sponsor closest to the camera located?\nChoices:\n(A) Israel\n(B) Ukraine\n(C) Germany\n(D) Poland" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1484375, "loss": 4.162546157836914 }, { "id": "identity_261578", "image": "ScienceQA/images/10310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism among the following options serves as the tertiary consumer in this food web?\nChoices:\n(A) golden algae\n(B) black crappie\n(C) rotifer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.875, "loss": 4.211778163909912 }, { "id": "identity_8113", "image": "AI2D/images/2303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, which letter/s depict Pupa?\nChoices:\n(A) C & D\n(B) A & C\n(C) B & D\n(D) A & B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0625, "loss": 4.0256781578063965 }, { "id": "identity_303622", "image": "ScienceQA/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms has matter that was once part of the phytoplankton?\nChoices:\n(A) sea otter\n(B) kelp bass\n(C) kelp" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.015625, "loss": 4.273098945617676 }, { "id": "identity_194308", "image": "PMC-VQA/images/PMC4222113_F2_333315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image with respect to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 28.859375, "loss": 4.112521648406982 }, { "id": "identity_325126", "image": "Super-CLEVR/images/superCLEVR_new_001698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small object in front of the big green metallic thing shaped like the large metallic object in front of the tiny purple thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.40625, "loss": 4.855381011962891 }, { "id": "identity_319882", "image": "PMC-VQA/images/PMC5511803_mgg3289-fig-0001_190766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are depicted in the image?\nChoices:\n(A) liver and gallbladder\n(B) cerebellar hemispheres and pons\n(C) spinal cord and brainstem\n(D) corpus callosum and cerebellar vermis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.625, "loss": 4.001124858856201 }, { "id": "identity_2503", "image": "Super-CLEVR/images/superCLEVR_new_001742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a metal object that is on the right side of the shiny bus; does it have the same shape as the small green thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.828125, "loss": 4.965217113494873 }, { "id": "identity_326280", "image": "TextVQA/images/c97ac1efbb3c6273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the advertisement suggest you do about the event tomorrow?" }, { "from": "gpt", "value": "The answer is watch" } ], "clip_score": 15.4453125, "loss": 5.328585624694824 }, { "id": "identity_151477", "image": "ScienceQA/images/15624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 32.28125, "loss": 4.374170780181885 }, { "id": "identity_298244", "image": "A-OKVQA/images/472828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which types of fruits does the vendor here specialize? \n(A) citrus\n(B) tomatoes\n(C) melons\n(D) pome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.671875, "loss": 4.13005256652832 }, { "id": "identity_262800", "image": "FigureQA/images/30469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Gold or Steel Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.5625, "loss": 4.613154411315918 }, { "id": "identity_327129", "image": "PMC-VQA/images/PMC4195844_Fig10_327545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scoliosis is shown in the images?\nChoices:\n(A) Lumbar scoliosis\n(B) Idiopathic scoliosis\n(C) Congenital scoliosis\n(D) Kyphoscoliosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.59375, "loss": 4.181933879852295 }, { "id": "identity_330621", "image": "A-OKVQA/images/79869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person who uses this area like to collect?\n\nChoices:\n(A) key chains\n(B) trolls\n(C) cleaning supplies\n(D) lap tops" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.03125, "loss": 4.588471412658691 }, { "id": "identity_215327", "image": "TextVQA/images/c8669273e697c321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pairs of sunglasses are visible on the table?" }, { "from": "gpt", "value": "The answer is One pair" } ], "clip_score": 17.5, "loss": 4.418601989746094 }, { "id": "identity_185765", "image": "TextVQA/images/c45c47d81053869d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity taking place in this image?" }, { "from": "gpt", "value": "The answer is Baseball game" } ], "clip_score": 23.859375, "loss": 4.189603805541992 }, { "id": "identity_8533", "image": "AI2D/images/1275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During photosynthesis, O2 is released, which letter shows this?\nChoices:\n(A) F\n(B) C\n(C) J\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.703125, "loss": 4.458145618438721 }, { "id": "identity_1913", "image": "A-OKVQA/images/404655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What temperatures are the persons walking experiencing?\nChoices:\n(A) below average\n(B) hot\n(C) freezing\n(D) cold" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.171875, "loss": 4.077520847320557 }, { "id": "identity_297112", "image": "IconQA/images/74168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one in the sequence, following the first picture of a fish?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.0625, "loss": 4.707602024078369 }, { "id": "identity_105852", "image": "Super-CLEVR/images/superCLEVR_new_001985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object to the left of the scooter?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.890625, "loss": 4.0518598556518555 }, { "id": "identity_307609", "image": "AI2D/images/3759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is another name for the midriff?\nChoices:\n(A) Pharynx\n(B) Diaphragm\n(C) Bronchi\n(D) Larynx" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 4.396842002868652 }, { "id": "identity_334332", "image": "PMC-VQA/images/PMC4436458_fig2_387646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the heart is depicted in this M-mode tracing?\nChoices:\n(A) Aorta\n(B) Superior vena cava\n(C) Interventricular septum\n(D) Left atrium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.875362396240234 }, { "id": "identity_117191", "image": "A-OKVQA/images/100726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.3125, "loss": 4.622819423675537 }, { "id": "identity_188838", "image": "A-OKVQA/images/58335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 11.0859375, "loss": 4.610590934753418 }, { "id": "identity_217042", "image": "A-OKVQA/images/451793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible on the building in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.84375, "loss": 4.6945929527282715 }, { "id": "identity_29961", "image": "A-OKVQA/images/423951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What site is the water shown in here?\nChoices:\n(A) stream\n(B) fish lake\n(C) harbor\n(D) pond" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5234375, "loss": 4.424386024475098 }, { "id": "identity_107494", "image": "TextVQA/images/05f2620cb094d130.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many subdials are present on the watch face?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.03125, "loss": 4.121572017669678 }, { "id": "identity_315462", "image": "A-OKVQA/images/515612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What powers the musical instrument in the image?\nChoices:\n(A) solar\n(B) gas\n(C) battery\n(D) oil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.171875, "loss": 4.060660362243652 }, { "id": "identity_317883", "image": "ScienceQA/images/10605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the scientific name of this organism?\nChoices:\n(A) crown-of-thorns sea star\n(B) Acanthaster planci" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.71875, "loss": 4.058396816253662 }, { "id": "identity_337870", "image": "A-OKVQA/images/471280.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do people put on these tracks?\nChoices:\n(A) ballast\n(B) asphalt\n(C) concrete\n(D) wires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 4.301609992980957 }, { "id": "identity_205181", "image": "TextVQA/images/69bb92e238e6fa80.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the platform to the right?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.71875, "loss": 4.491930961608887 }, { "id": "identity_191584", "image": "PMC-VQA/images/PMC3667362_Fig1_207823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual images are presented in the figure?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 21.234375, "loss": 4.228517532348633 }, { "id": "identity_22369", "image": "A-OKVQA/images/442630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which area does the man wait?\nChoices:\n(A) rural\n(B) forest\n(C) suburban\n(D) urban" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.640625, "loss": 4.721498012542725 }, { "id": "identity_242868", "image": "ScienceQA/images/572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, and then name the state directly to its north" }, { "from": "gpt", "value": "The answer is Harrisburg" } ], "clip_score": 28.96875, "loss": 4.125787734985352 }, { "id": "identity_275915", "image": "FigureQA/images/49439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.71875, "loss": 4.406713485717773 }, { "id": "identity_285818", "image": "PMC-VQA/images/PMC5684645_F3_239345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label is utilized in the images to indicate the location of AIS?\nChoices:\n(A) mCherry\n(B) Axon\n(C) Soma\n(D) Dendrites" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 5.030344009399414 }, { "id": "identity_24056", "image": "PMC-VQA/images/PMC5832061_fig6_282327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hernia is seen in the L3-L4 level? \nChoices:\n(A) Central discal hernia \n(B) Right foraminal/extreme lateral discal hernia \n(C) Left foraminal/extreme lateral discal hernia \n(D) Posterolateral discal hernia " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.40625, "loss": 4.3060431480407715 }, { "id": "identity_56827", "image": "FigureQA/images/23969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors represent a smaller percentage than Light Green?" }, { "from": "gpt", "value": "The answer is Two colors" } ], "clip_score": 28.546875, "loss": 5.094193458557129 }, { "id": "identity_276393", "image": "A-OKVQA/images/545163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of traffic is prohibited from going behind the fence in this area?\nChoices:\n(A) cane assisted\n(B) automobile\n(C) foot\n(D) elderly walkers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.9375, "loss": 4.43678092956543 }, { "id": "identity_21523", "image": "IconQA/images/41131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is third?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.21875, "loss": 4.080086708068848 }, { "id": "identity_295730", "image": "A-OKVQA/images/35473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the overhead wire used? \n(A) to power streetcars\n(B) to guide streetcars\n(C) for electric utility\n(D) for phone lines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.25, "loss": 4.334231376647949 }, { "id": "identity_32396", "image": "IconQA/images/48816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) cube\n(B) cylinder\n(C) cone\n(D) sphere" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 4.342711448669434 }, { "id": "identity_270187", "image": "IconQA/images/35317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of rectangles an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.9375, "loss": 4.128157615661621 }, { "id": "identity_273898", "image": "A-OKVQA/images/162829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is this?\nOptions:\n(A) show\n(B) reception\n(C) presentation\n(D) meeting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.21875, "loss": 4.390580177307129 }, { "id": "identity_127901", "image": "VQA2.0/images/174952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels does the skateboard have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.0625, "loss": 4.1308794021606445 }, { "id": "identity_269088", "image": "PMC-VQA/images/PMC2492868_F6_26128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique was utilized to acquire the results displayed in the figure? \nOptions:\n(A) Northern blot\n(B) Western blot\n(C) Southern blot\n(D) Eastern blot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4453125, "loss": 4.431933403015137 }, { "id": "identity_203247", "image": "A-OKVQA/images/125120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups are visible on the table?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 11.4765625, "loss": 4.193922519683838 }, { "id": "identity_116847", "image": "TextVQA/images/ce621548b854c803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the device in the image?" }, { "from": "gpt", "value": "The answer is Lying flat" } ], "clip_score": 18.59375, "loss": 4.481661796569824 }, { "id": "identity_5466", "image": "ScienceQA/images/19193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the producer in this food web?\nChoices:\n(A) rotifer\n(B) green algae\n(C) shiner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.078125, "loss": 4.02777624130249 }, { "id": "identity_27514", "image": "ScienceQA/images/1125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Colorado?\nChoices:\n(A) Denver\n(B) Salem\n(C) Olympia\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.241631507873535 }, { "id": "identity_39313", "image": "DVQA/images/bar_train_00149009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the lowest value for the 'close' category?" }, { "from": "gpt", "value": "The answer is duty" } ], "clip_score": 23.25, "loss": 4.790244102478027 }, { "id": "identity_276593", "image": "PMC-VQA/images/PMC5440401_Fig1_167188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the arrows shown in the image?\nChoices:\n(A) Water flow direction\n(B) AQP4 distribution pattern\n(C) AQP4 expression level\n(D) Relative distribution of AQP4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.015625, "loss": 4.094853401184082 }, { "id": "identity_128810", "image": "PMC-VQA/images/PMC5899791_F1_301491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teeth are visible in the image?" }, { "from": "gpt", "value": "The answer is Four teeth" } ], "clip_score": 27.4375, "loss": 4.6091508865356445 }, { "id": "identity_20512", "image": "TextVQA/images/7a805588525a71e4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the year on the wine?" }, { "from": "gpt", "value": "The answer is colon" } ], "clip_score": 22.046875, "loss": 6.3704328536987305 }, { "id": "identity_326708", "image": "A-OKVQA/images/556278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is keeping the street lit?\nChoices:\n(A) fire\n(B) sun\n(C) street lights\n(D) flashlights" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.734375, "loss": 4.607152462005615 }, { "id": "identity_207303", "image": "VQA2.0/images/152056.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lamps are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.5, "loss": 4.605067729949951 }, { "id": "identity_190884", "image": "PMC-VQA/images/PMC5684645_F3_239343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrowheads are pointing to structures in figure (A1)?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.4375, "loss": 4.404046058654785 }, { "id": "identity_267474", "image": "AI2D/images/508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the primary consumer in the diagram provided:\nOptions:\n(A) snake\n(B) mouse\n(C) hawk\n(D) fox" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.125, "loss": 4.257456302642822 }, { "id": "identity_295801", "image": "PMC-VQA/images/PMC7803468_Fig5_341099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the area of consolidation?\nChoices:\n(A) Blood vessels \n(B) Sonographic air bronchogram \n(C) Intestinal gas \n(D) Muscle tissue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.265625, "loss": 4.383386135101318 }, { "id": "identity_35876", "image": "PMC-VQA/images/PMC5472191_f0030_179546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe was C5 located?\nChoices:\n(A) Frontal lobe \n(B) Parietal lobe \n(C) Occipital lobe \n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.545241355895996 }, { "id": "identity_128513", "image": "A-OKVQA/images/425184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the primary mode of transportation in the image?" }, { "from": "gpt", "value": "The answer is Silver or gray" } ], "clip_score": 18.15625, "loss": 4.567681312561035 }, { "id": "identity_31730", "image": "A-OKVQA/images/323726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brown object in the squatting man's hand?\nChoices:\n(A) football\n(B) resin\n(C) glove\n(D) pretzel" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.703125, "loss": 4.715153694152832 }, { "id": "identity_321667", "image": "A-OKVQA/images/91520.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the vehicles doing in the enclosed area with animals?\nChoices:\n(A) hunting\n(B) touring\n(C) racing\n(D) capturing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.125, "loss": 4.489778518676758 }, { "id": "identity_326253", "image": "A-OKVQA/images/302141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place located?\nChoices:\n(A) canada\n(B) spain\n(C) mexico\n(D) france" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3515625, "loss": 4.182816982269287 }, { "id": "identity_320998", "image": "A-OKVQA/images/522990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the individual responsible for the horse and carriage?\nChoices:\n(A) horses head\n(B) in market\n(C) running races\n(D) bathroom" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.28125, "loss": 4.277599334716797 }, { "id": "identity_198238", "image": "VQA2.0/images/252244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.359375, "loss": 4.305290699005127 }, { "id": "identity_283136", "image": "A-OKVQA/images/541493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is connected to the outlet?\n(A) nightlight\n(B) hair dryer\n(C) phone\n(D) electric toothbrush" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.125, "loss": 4.202315807342529 }, { "id": "identity_196512", "image": "VQA2.0/images/511639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are riding the elephant?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.640625, "loss": 4.214141845703125 }, { "id": "identity_325301", "image": "PMC-VQA/images/PMC8960880_Fig7_237236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which parts of the implant are shown in higher power views in the panel on the right?\nChoices:\n(A) Head and tail\n(B) Tail and body\n(C) Head and tip\n(D) Body and tip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.8203125, "loss": 4.076822757720947 }, { "id": "identity_107106", "image": "VQA2.0/images/277439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual table settings are visible?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.578125, "loss": 4.810176849365234 }, { "id": "identity_209764", "image": "TextVQA/images/b103e7b42c9d15ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the typewriter as indicated on the tag?" }, { "from": "gpt", "value": "The answer is A1E Backroom" } ], "clip_score": 30.625, "loss": 5.554213523864746 }, { "id": "identity_316025", "image": "A-OKVQA/images/235672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this place using covered gazebos?\nChoices:\n(A) too icy\n(B) stay dry\n(C) too sunny\n(D) too windy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.193076133728027 }, { "id": "identity_307151", "image": "FigureQA/images/17603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.03125, "loss": 4.105035781860352 }, { "id": "identity_51939", "image": "DocVQA/images/kxcg0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of Mike Middleton at Emro Marketing?" }, { "from": "gpt", "value": "The answer is VA Speedee Division Manager" } ], "clip_score": 21.296875, "loss": 5.0900678634643555 }, { "id": "identity_206269", "image": "A-OKVQA/images/429793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books can be seen on the desk?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.84375, "loss": 4.792141914367676 }, { "id": "identity_334783", "image": "A-OKVQA/images/411417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of item is hanging on this motorcycle?\nChoices:\n(A) air freshener\n(B) groceries\n(C) parking permit\n(D) necklace" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.109375, "loss": 4.009081840515137 }, { "id": "identity_32938", "image": "A-OKVQA/images/147042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the hanging item used for?\nChoices:\n(A) seasoning\n(B) light\n(C) mix soup\n(D) swatting flies" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.8125, "loss": 4.096576690673828 }, { "id": "identity_313333", "image": "FigureQA/images/10333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.21875, "loss": 4.439375400543213 }, { "id": "identity_297280", "image": "A-OKVQA/images/29894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is the man prepared to take with the racket in his hand?\nChoices:\n(A) duck\n(B) dip\n(C) dodge\n(D) hit ball" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.453125, "loss": 4.212399482727051 }, { "id": "identity_43552", "image": "DocVQA/images/htmd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the department requesting the authorization?" }, { "from": "gpt", "value": "The answer is R&D Biochemical/Behavioral" } ], "clip_score": 22.265625, "loss": 4.240110397338867 }, { "id": "identity_4811", "image": "FigureQA/images/93375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.078125, "loss": 4.139158725738525 }, { "id": "identity_28688", "image": "A-OKVQA/images/5123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trash goes in these trash cans?\nChoices:\n(A) recycling\n(B) general waste\n(C) horse manure\n(D) greenery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.078125, "loss": 4.503756046295166 }, { "id": "identity_68794", "image": "ScienceQA/images/19696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the natural shape of the object in the image?" }, { "from": "gpt", "value": "The answer is Irregular natural shape" } ], "clip_score": 22.984375, "loss": 4.609912395477295 }, { "id": "identity_314182", "image": "A-OKVQA/images/320978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vegetable is probably the priciest item by piece or pound?\nChoices:\n(A) artichoke\n(B) celery\n(C) corn\n(D) turnips" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.038127899169922 }, { "id": "identity_50404", "image": "DocVQA/images/syjf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main concern expressed in the letter?" }, { "from": "gpt", "value": "The answer is The risk of unfavorable revisions in the Actos labeling" } ], "clip_score": 21.484375, "loss": 4.329476356506348 }, { "id": "identity_1115", "image": "VQA2.0/images/304997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the letter behind home plate?" }, { "from": "gpt", "value": "The answer is w" } ], "clip_score": 23.875, "loss": 4.06578254699707 }, { "id": "identity_312573", "image": "A-OKVQA/images/296851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transport is shown in the image above?\nChoices:\n(A) railway\n(B) air\n(C) water\n(D) road" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.5546875, "loss": 4.43367338180542 }, { "id": "identity_108802", "image": "VQA-AS/images/12857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.21875, "loss": 4.4214701652526855 }, { "id": "identity_192567", "image": "PMC-VQA/images/PMC6067806_FIG2_347902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.125, "loss": 4.259124279022217 }, { "id": "identity_30154", "image": "A-OKVQA/images/323528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area is shown here?\nChoices:\n(A) farm\n(B) urban\n(C) rural\n(D) suburban" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.7109375, "loss": 4.665204048156738 }, { "id": "identity_20239", "image": "PMC-VQA/images/PMC7719498_fig1_315933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the X-ray show regarding the medial clear space? \nChoices:\n(A) Decreased \n(B) Increased \n(C) Normal \n(D) Irrelevant " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.234375, "loss": 4.237575531005859 }, { "id": "identity_3166", "image": "PMC-VQA/images/PMC7693825_jcm-09-03516-f002_306867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the grey color in the CT-scan represent? \nChoices:\n(A) cartilage \n(B) postresection area \n(C) native mandible \n(D) plate rupture " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.84375, "loss": 5.229498863220215 }, { "id": "identity_275956", "image": "ScienceQA/images/6895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.015625, "loss": 4.210256576538086 }, { "id": "identity_52729", "image": "DocVQA/images/xzgg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the assistant editor who signed the letter?" }, { "from": "gpt", "value": "The answer is Elizabeth Chamberlin" } ], "clip_score": 24.328125, "loss": 4.389684200286865 }, { "id": "identity_307478", "image": "VizWiz/images/VizWiz_train_00023923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.03125, "loss": 4.122292518615723 }, { "id": "identity_31890", "image": "TextVQA/images/91e4c8b4e33b7aad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: are these to machines to put coins in and out come different kinds of stickers?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 24.53125, "loss": 4.529354095458984 }, { "id": "identity_37483", "image": "A-OKVQA/images/266168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is next to the tracks?\nChoices:\n(A) walking couple\n(B) dog\n(C) tall lights\n(D) cat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.59375, "loss": 4.476614475250244 }, { "id": "identity_304988", "image": "AI2D/images/2357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is stage B?\nChoices:\n(A) Stage B is the root\n(B) Stage B is the adult\n(C) Stage B is the seed\n(D) Stage B is the seedling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.875, "loss": 4.035689353942871 }, { "id": "identity_335053", "image": "ScienceQA/images/13629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Annapolis\n(B) Columbus\n(C) Cheyenne\n(D) Cincinnati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.25, "loss": 4.309609413146973 }, { "id": "identity_218450", "image": "A-OKVQA/images/186095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.015625, "loss": 4.15737247467041 }, { "id": "identity_112173", "image": "VQA-AS/images/11110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White and brown" } ], "clip_score": 27.34375, "loss": 4.094278812408447 }, { "id": "identity_263768", "image": "Super-CLEVR/images/superCLEVR_new_001742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the metal object on the right side of the shiny bus have the same shape as the small green thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.765625, "loss": 4.844674110412598 }, { "id": "identity_21934", "image": "A-OKVQA/images/233744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of water is shown here?\nChoices:\n(A) pool\n(B) ocean\n(C) swamp\n(D) pond" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 5.75, "loss": 4.1816887855529785 }, { "id": "identity_296335", "image": "AI2D/images/2900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does label B represent in the given choices?\n(A) Right Lung\n(B) Lobe Removed\n(C) Normal Lymph Nodes\n(D) Left Lung" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.3125, "loss": 4.536776065826416 }, { "id": "identity_326182", "image": "A-OKVQA/images/499068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees can be seen near the building?\nChoices:\n(A) maple trees\n(B) pine trees\n(C) elm trees\n(D) palm trees" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.6171875, "loss": 4.045807838439941 }, { "id": "identity_284530", "image": "TextVQA/images/005a2a34b7a0e507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word written on the man's shirt?" }, { "from": "gpt", "value": "The answer is turbo" } ], "clip_score": 15.8046875, "loss": 4.388216495513916 }, { "id": "identity_289754", "image": "PMC-VQA/images/PMC4579082_pbio.1002253.g002_425907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is visible in the transverse sections shown in panels (e,f)? \nOptions:\n(A) Soleus\n(B) Masseter\n(C) Biceps brachii\n(D) Pectoralis minor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.217895984649658 }, { "id": "identity_273967", "image": "AI2D/images/647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which season is represented by the letter B?\nChoices:\n(A) Autumn\n(B) Summer\n(C) Winter\n(D) Spring" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.765625, "loss": 4.237864017486572 }, { "id": "identity_299401", "image": "A-OKVQA/images/465824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman engaged in?\nChoices:\n(A) dancing\n(B) jumping\n(C) sleeping\n(D) yawning" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.1484375, "loss": 4.12064266204834 }, { "id": "identity_291910", "image": "A-OKVQA/images/312182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are people currently engaged in? Are they:\n(A) singing\n(B) marching\n(C) swimming\n(D) protesting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.163873195648193 }, { "id": "identity_204406", "image": "A-OKVQA/images/254177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the man's tie?" }, { "from": "gpt", "value": "The answer is A shiny intricate pattern" } ], "clip_score": 22.125, "loss": 4.092831611633301 }, { "id": "identity_121155", "image": "Super-CLEVR/images/superCLEVR_new_001267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the blue object on the right side of the image?" }, { "from": "gpt", "value": "The answer is A minivan" } ], "clip_score": 22.5, "loss": 4.196739673614502 }, { "id": "identity_262431", "image": "AI2D/images/4301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the label for sea level in the given choices? \n(A) I\n(B) E\n(C) B\n(D) F" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.8828125, "loss": 4.47440242767334 }, { "id": "identity_146707", "image": "DocVQA/images/hncc0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is Identity Standard; Listing of Papain as an Optional Ingredient" } ], "clip_score": 18.84375, "loss": 4.52413272857666 }, { "id": "identity_312361", "image": "AI2D/images/3761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lowest part of the diagram?\nChoices:\n(A) diaphragm\n(B) trachea\n(C) none of the above\n(D) left lung" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.484375, "loss": 4.352478981018066 }, { "id": "identity_332733", "image": "VizWiz/images/VizWiz_train_00015915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this artwork?" }, { "from": "gpt", "value": "The answer is pom tree" } ], "clip_score": 24.328125, "loss": 5.851312160491943 }, { "id": "identity_262201", "image": "AI2D/images/2648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which stage of the solar cycle is the letter A representing?\nChoices:\n(A) Winter Solstice\n(B) Vernal Equinox\n(C) Autumnal Equinox\n(D) Summer Solstice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.96875, "loss": 4.5113091468811035 }, { "id": "identity_114678", "image": "PMC-VQA/images/PMC9251490_F1_330210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the horizontal axis in the image?" }, { "from": "gpt", "value": "The answer is Time" } ], "clip_score": 23.8125, "loss": 4.124154090881348 }, { "id": "identity_6587", "image": "TQA/images/question_images/parts_neuron_7219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is not part of a neuron\nChoices:\n(A) soma\n(B) axon\n(C) fingers\n(D) dentrites" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.287812232971191 }, { "id": "identity_321903", "image": "PMC-VQA/images/PMC7738134_F1_321312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the condition of the patient's intraoral mucosa seen in the image?\nChoices:\n(A) Intact\n(B) Swollen\n(C) Blistered\n(D) Ruptured" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.796875, "loss": 4.019886493682861 }, { "id": "identity_205108", "image": "PMC-VQA/images/PMC7364197_fig1_214940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the exact location of the white arrowhead be determined in relation to the anatomical structure?" }, { "from": "gpt", "value": "The answer is Hip bone" } ], "clip_score": 26.96875, "loss": 4.030590057373047 }, { "id": "identity_28286", "image": "ScienceQA/images/6496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) salty\n(B) translucent\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.515625, "loss": 4.13137149810791 }, { "id": "identity_248663", "image": "Super-CLEVR/images/superCLEVR_new_001578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small object that is on the left side of the car that is on the right side of the matte object behind the blue rubber thing?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 30.90625, "loss": 4.019708156585693 }, { "id": "identity_310010", "image": "Super-CLEVR/images/superCLEVR_new_001008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the red bus to the left of the object behind the bike?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 26.515625, "loss": 4.559618949890137 }, { "id": "identity_191453", "image": "A-OKVQA/images/297645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible elephants are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.40625, "loss": 4.529883861541748 }, { "id": "identity_315536", "image": "A-OKVQA/images/378657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sound are people here waiting for?\nChoices:\n(A) clock chime\n(B) prayer\n(C) rodeo band\n(D) silence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.375, "loss": 4.649081230163574 }, { "id": "identity_131064", "image": "Super-CLEVR/images/superCLEVR_new_000112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the purple double bus and has the same number of wheels as the green airplane?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 33.9375, "loss": 4.755102634429932 }, { "id": "identity_184886", "image": "PMC-VQA/images/PMC9078808_fig4_274145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green colored markers are there in the CEUS image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.125, "loss": 4.367787837982178 }, { "id": "identity_8718", "image": "PMC-VQA/images/PMC7319815_ytaa086-F1_200240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of prosthesis is being shown in the image? \nChoices:\n(A) Biological Prosthesis \n(B) Mechanical Prosthesis \n(C) Tissue-engineered Prosthesis \n(D) Autograft Prosthesis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1171875, "loss": 4.3037567138671875 }, { "id": "identity_274998", "image": "Super-CLEVR/images/superCLEVR_new_000384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the colors of the small rubber motorbike and the big chopper the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.0, "loss": 5.462634086608887 }, { "id": "identity_25186", "image": "TextVQA/images/5515f3d3b673ed49.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what company is on her jersey?" }, { "from": "gpt", "value": "The answer is uhisport" } ], "clip_score": 24.40625, "loss": 4.053682804107666 }, { "id": "identity_34658", "image": "PMC-VQA/images/PMC3841825_F3_246370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle type is the soleus in the TEM images? \nChoices:\n(A) skeletal \n(B) cardiac \n(C) smooth \n(D) none of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.46875, "loss": 4.070502281188965 }, { "id": "identity_268314", "image": "FigureQA/images/53633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the highest option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.234375, "loss": 4.297243595123291 }, { "id": "identity_318280", "image": "FigureQA/images/23478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.171875, "loss": 4.037376403808594 }, { "id": "identity_313618", "image": "FigureQA/images/80684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.859375, "loss": 4.013852119445801 }, { "id": "identity_264372", "image": "VQA2.0/images/336077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the stadium full of people?" }, { "from": "gpt", "value": "The answer is baseball game" } ], "clip_score": 24.25, "loss": 5.152805805206299 }, { "id": "identity_145288", "image": "DocVQA/images/qnjw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the folder name mentioned in the separation record?" }, { "from": "gpt", "value": "The answer is CAST Box 113:9" } ], "clip_score": 18.328125, "loss": 4.458085536956787 }, { "id": "identity_335467", "image": "DVQA/images/bar_train_00195380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values less than 7?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 22.390625, "loss": 4.0504326820373535 }, { "id": "identity_123931", "image": "VQA2.0/images/91841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the man in the foreground walking towards?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 21.109375, "loss": 4.03020715713501 }, { "id": "identity_268867", "image": "FigureQA/images/68914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.089420318603516 }, { "id": "identity_22074", "image": "PMC-VQA/images/PMC6067806_FIG1_347887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the brain is affected in the CT scan?\nChoices:\n(A) Left side \n(B) Right side \n(C) Both sides \n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.484375, "loss": 4.179743766784668 }, { "id": "identity_29202", "image": "DocVQA/images/rzcd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first title in the document?" }, { "from": "gpt", "value": "The answer is program" } ], "clip_score": 18.109375, "loss": 4.069574356079102 }, { "id": "identity_299225", "image": "IconQA/images/47517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one if the first picture is a tractor, and the choices are a barn, a tractor, and the sun?\nChoices:\n(A) barn\n(B) tractor\n(C) sun" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.0625, "loss": 4.026300430297852 }, { "id": "identity_311869", "image": "TQA/images/question_images/human_system_digestive_6096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is below this part?\nChoices:\n(A) stomach\n(B) anus\n(C) large intestine\n(D) gullet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.125, "loss": 4.260595798492432 }, { "id": "identity_288770", "image": "VQA2.0/images/61488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is located at the top of the rectangular stand that is blue in color?" }, { "from": "gpt", "value": "The answer is towel" } ], "clip_score": 14.3984375, "loss": 4.279076099395752 }, { "id": "identity_59451", "image": "DVQA/images/bar_train_00196053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy value for the 'danger' algorithm?" }, { "from": "gpt", "value": "The answer is park" } ], "clip_score": 33.75, "loss": 4.095647811889648 }, { "id": "identity_27936", "image": "A-OKVQA/images/574140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is he attending?\nChoices:\n(A) concert\n(B) meeting\n(C) game\n(D) reception" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.796875, "loss": 4.300723075866699 }, { "id": "identity_104222", "image": "A-OKVQA/images/2525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different cupcake toppings can be seen?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 24.140625, "loss": 4.87400484085083 }, { "id": "identity_189406", "image": "PMC-VQA/images/PMC5842610_Fig4_285246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this MRI image taken with contrast?" }, { "from": "gpt", "value": "The answer is No contrast used" } ], "clip_score": 30.703125, "loss": 4.7054314613342285 }, { "id": "identity_184790", "image": "VizWiz/images/VizWiz_train_00009127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bowling pins are displayed on the cover?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.375, "loss": 4.261315822601318 }, { "id": "identity_42753", "image": "DVQA/images/bar_train_00171335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for 'summer'?" }, { "from": "gpt", "value": "The answer is reply" } ], "clip_score": 25.0625, "loss": 4.503985404968262 }, { "id": "identity_279337", "image": "VQA2.0/images/44337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the disposable cups?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 16.015625, "loss": 4.061001300811768 }, { "id": "identity_307498", "image": "AI2D/images/4809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Composed of more than one part is known as what?\nChoices:\n(A) Compond\n(B) Round\n(C) Helicord\n(D) Flat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.78125, "loss": 4.108559608459473 }, { "id": "identity_274003", "image": "A-OKVQA/images/256276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman engaged in with the bear? Is she cleaning it, feeding it, smothering it, or fighting it?\nChoices:\n(A) cleaning it\n(B) feeding it\n(C) smothering it\n(D) fighting it" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.0, "loss": 4.289012432098389 }, { "id": "identity_13474", "image": "AI2D/images/4217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does I represent in the image?\nChoices:\n(A) none of the above\n(B) sediments\n(C) rocks\n(D) magma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.375, "loss": 4.464236736297607 }, { "id": "identity_338365", "image": "Super-CLEVR/images/superCLEVR_new_000069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the green rubber object on the right side of the matte bicycle in front of the object behind the yellow rubber object?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 33.15625, "loss": 4.525604724884033 }, { "id": "identity_328773", "image": "A-OKVQA/images/393087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this old tower made of?\nChoices:\n(A) brick\n(B) wood\n(C) cobblestone\n(D) mud" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.625, "loss": 4.292593479156494 }, { "id": "identity_131213", "image": "Super-CLEVR/images/superCLEVR_new_001703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the airplane that is to the right of the purple motorcycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 32.28125, "loss": 4.451363563537598 }, { "id": "identity_284634", "image": "TextVQA/images/8b44bfa1b5717879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What program is located at the bottom left?" }, { "from": "gpt", "value": "The answer is tunenet" } ], "clip_score": 20.578125, "loss": 4.437703609466553 }, { "id": "identity_296854", "image": "A-OKVQA/images/319521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the occupation of the man who is described as black?\nChoices:\n(A) doctor\n(B) salesman\n(C) lifeguard\n(D) officer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3515625, "loss": 4.411592483520508 }, { "id": "identity_322830", "image": "A-OKVQA/images/387895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for this person having a bag with them?\nChoices:\n(A) lunch\n(B) travelling\n(C) shopping\n(D) hiking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.0546875, "loss": 4.757626533508301 }, { "id": "identity_289358", "image": "ScienceQA/images/19329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Wyoming?\n(A) Cheyenne\n(B) Laramie\n(C) Boise\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.34375, "loss": 4.2427778244018555 }, { "id": "identity_308778", "image": "TextVQA/images/122d892264e2094b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this phone?" }, { "from": "gpt", "value": "The answer is cycn" } ], "clip_score": 26.1875, "loss": 4.141138553619385 }, { "id": "identity_54055", "image": "FigureQA/images/77551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines in the graph have a downward trend between the x-axis values of 20 and 40?" }, { "from": "gpt", "value": "The answer is Two lines" } ], "clip_score": 21.71875, "loss": 4.850664138793945 }, { "id": "identity_57110", "image": "MapQA/images/map_10090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range 20859-26780 on the map?" }, { "from": "gpt", "value": "The answer is Light green" } ], "clip_score": 24.53125, "loss": 4.485995769500732 }, { "id": "identity_295072", "image": "TabMWP/images/4662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time does the train arrive at Weston according to the schedule provided?\nChoices:\n(A) 2:30 P.M.\n(B) 10:20 A.M.\n(C) 6:25 P.M.\n(D) 7:50 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.953125, "loss": 4.169605255126953 }, { "id": "identity_215295", "image": "A-OKVQA/images/160740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.4375, "loss": 4.508145332336426 }, { "id": "identity_334385", "image": "IconQA/images/95132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) impossible\n(C) certain\n(D) probable" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.96875, "loss": 4.499194622039795 }, { "id": "identity_19664", "image": "TextVQA/images/87d7fdf7b45162d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: man vs. what?" }, { "from": "gpt", "value": "The answer is internet" } ], "clip_score": 16.421875, "loss": 4.776394367218018 }, { "id": "identity_314522", "image": "A-OKVQA/images/298051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of a band with this many members?\nChoices:\n(A) quartet\n(B) cinqtet\n(C) duet\n(D) sextet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.8671875, "loss": 4.29373025894165 }, { "id": "identity_5676", "image": "FigureQA/images/75409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Orange have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.90625, "loss": 4.515298843383789 }, { "id": "identity_331395", "image": "TextVQA/images/534f4ce2ac0805ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who produced these jars?" }, { "from": "gpt", "value": "The answer is diesel hpr" } ], "clip_score": 28.734375, "loss": 5.21387243270874 }, { "id": "identity_283480", "image": "PMC-VQA/images/PMC6345940_Fig3_427070.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are all the images available for viewing?\nChoices:\n(A) Main text \n(B) Supplementary Videos SV1 \n(C) Introduction \n(D) Acknowledgments" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.453125, "loss": 4.197642803192139 }, { "id": "identity_332057", "image": "TabMWP/images/16597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which complex had the most vacant apartments in August?\nChoices:\n(A) Shadyside Apartments\n(B) Alameda Apartments\n(C) Creekside Apartments\n(D) Lindale Apartments" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.21875, "loss": 4.015188694000244 }, { "id": "identity_287819", "image": "ScienceQA/images/15744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\nChoices:\n(A) Carson City\n(B) Helena\n(C) Reno\n(D) Las Vegas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.006106376647949 }, { "id": "identity_334207", "image": "Super-CLEVR/images/superCLEVR_new_001886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small cyan object behind the big brown object made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.875, "loss": 4.036317348480225 }, { "id": "identity_201449", "image": "PMC-VQA/images/PMC5394395_fig4_153636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the specific location of the avulsion fracture within the knee?" }, { "from": "gpt", "value": "The answer is Tibial tuberosity" } ], "clip_score": 28.953125, "loss": 4.101767063140869 }, { "id": "identity_319891", "image": "AI2D/images/3561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does A represent?\nChoices:\n(A) head\n(B) none of the above\n(C) stomach\n(D) rectum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.671875, "loss": 4.338000297546387 }, { "id": "identity_184825", "image": "TextVQA/images/39478eed4e70d4f2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the dugout behind the catcher?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.96875, "loss": 4.661592483520508 }, { "id": "identity_27170", "image": "A-OKVQA/images/138747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The glare from the sun is distracting for drivers by reflecting off of what surface?\nChoices:\n(A) pocket mirrors\n(B) cell phone\n(C) camera flash\n(D) motorcycle shields" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.28125, "loss": 4.157533645629883 }, { "id": "identity_263782", "image": "A-OKVQA/images/301453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What prevents the motorbike from tipping over?\nChoices:\n(A) braces\n(B) bricks\n(C) wall\n(D) person" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.46875, "loss": 4.2746148109436035 }, { "id": "identity_31546", "image": "ScienceQA/images/805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Utah?\nChoices:\n(A) Salem\n(B) Salt Lake City\n(C) Provo\n(D) Helena" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.328125, "loss": 4.214655876159668 }, { "id": "identity_336244", "image": "PMC-VQA/images/PMC4430636_fig1_386105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to create this image?\nChoices:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.21875, "loss": 4.919617652893066 }, { "id": "identity_284373", "image": "PMC-VQA/images/PMC9374607_Fig1_371743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the patient's brain is impacted by the occlusion?\nOptions:\n(A) Right hemisphere\n(B) Left hemisphere\n(C) Cerebellum\n(D) Brainstem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.46875, "loss": 4.2913618087768555 }, { "id": "identity_198099", "image": "VQA2.0/images/203036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing a patterned garment?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.484375, "loss": 4.7081146240234375 }, { "id": "identity_104326", "image": "Super-CLEVR/images/superCLEVR_new_000987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly above the green motorcycle?" }, { "from": "gpt", "value": "The answer is Multicolored" } ], "clip_score": 31.015625, "loss": 4.066434383392334 }, { "id": "identity_37871", "image": "DVQA/images/bar_train_00071692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is bay" } ], "clip_score": 22.203125, "loss": 4.05033016204834 }, { "id": "identity_318127", "image": "PMC-VQA/images/PMC8106835_Fig3_436110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the alveolar spaces?\nChoices:\n(A) Edema \n(B) Inflammatory cells \n(C) Enlarged type 2 pneumocytes \n(D) All of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.226329803466797 }, { "id": "identity_301593", "image": "A-OKVQA/images/520831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of tag hangs from the person in red's jacket?\nChoices:\n(A) ski pass\n(B) birth certificate\n(C) hall pass\n(D) bank statement" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.953125, "loss": 5.343186378479004 }, { "id": "identity_301367", "image": "TextVQA/images/5129f8041aa92388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the second section on this page?" }, { "from": "gpt", "value": "The answer is hypocrifis" } ], "clip_score": 11.9375, "loss": 4.62688684463501 }, { "id": "identity_275353", "image": "ScienceQA/images/16462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary rhetorical appeal used in this ad: pathos (emotion), logos (reason), or ethos (character)?\nChoices:\n(A) pathos (emotion)\n(B) logos (reason)\n(C) ethos (character)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.32421875, "loss": 4.824344635009766 }, { "id": "identity_183837", "image": "A-OKVQA/images/16964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many train tracks are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.421875, "loss": 4.297466278076172 }, { "id": "identity_23251", "image": "PMC-VQA/images/PMC2494588_fig3_26203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body was imaged in the given image? \nChoices:\n(A) Stomach \n(B) Knee \n(C) Chest \n(D) Brain " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.241530418395996 }, { "id": "identity_285502", "image": "A-OKVQA/images/553640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what event are the individuals currently participating? \nOptions:\n(A) wedding\n(B) birthday\n(C) funeral\n(D) graduation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.5625, "loss": 4.401375770568848 }, { "id": "identity_34574", "image": "A-OKVQA/images/85225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal would you be most unlikely to see in this setting?\nChoices:\n(A) tiger\n(B) dog\n(C) horse\n(D) cat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 1.3505859375, "loss": 4.167836666107178 }, { "id": "identity_5941", "image": "ScienceQA/images/6299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Hampshire?\nChoices:\n(A) Concord\n(B) Augusta\n(C) Manchester\n(D) Austin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.6875, "loss": 4.120919227600098 }, { "id": "identity_217916", "image": "A-OKVQA/images/113977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant hair color of the audience in the image?" }, { "from": "gpt", "value": "The answer is The most common hair color" } ], "clip_score": 17.984375, "loss": 4.193868160247803 }, { "id": "identity_319286", "image": "DVQA/images/bar_train_00125033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the end?" }, { "from": "gpt", "value": "The answer is river" } ], "clip_score": 15.9921875, "loss": 4.4169816970825195 }, { "id": "identity_328763", "image": "PMC-VQA/images/PMC5214568_fig3_110055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area is affected by more than half of the maxillary sinus involvement according to the CBCT scan?\nChoices:\n(A) Maxillary left back tooth area\n(B) Maxillary right back tooth area\n(C) Front teeth area\n(D) Mandibular right tooth area" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.75, "loss": 4.286648750305176 }, { "id": "identity_31354", "image": "A-OKVQA/images/535692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does surfing come from?\nChoices:\n(A) mexico\n(B) hawaii\n(C) polynesia\n(D) argentina" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.5, "loss": 4.025493621826172 }, { "id": "identity_270322", "image": "FigureQA/images/97843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the two, Salmon or Tan, is greater?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.78125, "loss": 4.300826072692871 }, { "id": "identity_221149", "image": "TextVQA/images/90d475ad016a6c98.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second menu item listed under \"Piatti con tartufo\"?" }, { "from": "gpt", "value": "The answer is Carema" } ], "clip_score": 33.5, "loss": 4.555910587310791 }, { "id": "identity_142850", "image": "MapQA/images/map_11045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are not applicable (N/A) according to the map?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 31.71875, "loss": 4.0056939125061035 }, { "id": "identity_326453", "image": "A-OKVQA/images/400123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the opposite wall from this? \nChoices:\n(A) picture\n(B) towel bar\n(C) shower\n(D) bathtub" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.9375, "loss": 4.379587650299072 }, { "id": "identity_299368", "image": "A-OKVQA/images/132954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which year was the Union Jack created?\nChoices:\n(A) 1606\n(B) 1612\n(C) 1672\n(D) 1619" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.890625, "loss": 4.152121067047119 }, { "id": "identity_289123", "image": "PMC-VQA/images/PMC3730215_fig1_221921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized for the initial examination? \nOptions:\n(A) CT scan\n(B) MRI\n(C) Plain X-ray radiography\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.109375, "loss": 4.134143352508545 }, { "id": "identity_319935", "image": "Super-CLEVR/images/superCLEVR_new_000687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the articulated bus the same as this red thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.46875, "loss": 4.820348262786865 }, { "id": "identity_288499", "image": "PMC-VQA/images/PMC3253626_F12_121450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do images E, J, and K depict?\nChoices:\n(A) gastrostyles\n(B) coenosteal texture\n(C) gastropores\n(D) dactylostyles." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.9375, "loss": 5.111761093139648 }, { "id": "identity_121938", "image": "PMC-VQA/images/PMC6108962_Fig6_358604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the mass located on the right or left side of the midline?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 19.9375, "loss": 4.011301517486572 }, { "id": "identity_262194", "image": "TQA/images/question_images/biomes_6569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which biome predominantly covers Russia? \n(A) Subtropical Rainforest\n(B) Taiga\n(C) Xeric Shrubland\n(D) Temperate Broadleafs Forest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.015625, "loss": 4.049761772155762 }, { "id": "identity_275348", "image": "FigureQA/images/96150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Turquoise or Forest Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2109375, "loss": 4.607803821563721 }, { "id": "identity_281218", "image": "ScienceQA/images/8177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Wisconsin\n(B) South Carolina\n(C) Maryland\n(D) North Carolina" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.96875, "loss": 4.039801120758057 }, { "id": "identity_15674", "image": "DVQA/images/bar_train_00165930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second stack of bars from the left?" }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 23.421875, "loss": 4.079441070556641 }, { "id": "identity_304189", "image": "FigureQA/images/43694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the amount of Dark Orange smaller than Cornflower?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.765625, "loss": 4.151219367980957 }, { "id": "identity_326096", "image": "A-OKVQA/images/129175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these called?\nChoices:\n(A) gondola\n(B) tugboat\n(C) rowboat\n(D) putter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.0033278465271 }, { "id": "identity_26156", "image": "A-OKVQA/images/491515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are bags identified here?\nChoices:\n(A) tracking device\n(B) color\n(C) tags\n(D) they aren't" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.9140625, "loss": 4.48563289642334 }, { "id": "identity_10486", "image": "A-OKVQA/images/386581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will she put on the car?\nChoices:\n(A) ticket\n(B) business card\n(C) flyer\n(D) registration" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.3984375, "loss": 4.5026679039001465 }, { "id": "identity_157754", "image": "TQA/images/abc_question_images/nuclear_energy_18103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of particles are involved in the process at E?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.953125, "loss": 4.159699440002441 }, { "id": "identity_320096", "image": "IconQA/images/13394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed? Answer using a time word, not a number. It is (_) after one." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 25.6875, "loss": 4.896784782409668 }, { "id": "identity_215003", "image": "PMC-VQA/images/PMC3056626_F0006_90025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the arrow pointing to in the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 28.65625, "loss": 4.853034496307373 }, { "id": "identity_273028", "image": "A-OKVQA/images/293895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item in the image is responsible for providing shade? \nChoices:\n(A) towers\n(B) towels\n(C) umbrellas\n(D) trees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5234375, "loss": 4.263299465179443 }, { "id": "identity_324806", "image": "PMC-VQA/images/PMC5985138_fig1_323495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image show?\nChoices:\n(A) Large bowel obstruction \n(B) Small bowel obstruction \n(C) Liver cirrhosis \n(D) Pancreatitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.430108070373535 }, { "id": "identity_321347", "image": "PMC-VQA/images/PMC1564131_F2_7139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of images are displayed in the MRI?\nChoices:\n(A) Coronal and sagittal \n(B) Axial and coronal \n(C) Oblique and sagittal \n(D) Sagittal and transaxial." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.640625, "loss": 4.061346530914307 }, { "id": "identity_335146", "image": "PMC-VQA/images/PMC3199062_fig4_112726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the colors in this image indicate?\nChoices:\n(A) Intensity levels \n(B) Regions of interest \n(C) P-value thresholds \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.921875, "loss": 5.017683982849121 }, { "id": "identity_336761", "image": "ScienceQA/images/6383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) the Federated States of Micronesia\n(B) the Marshall Islands\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.435329914093018 }, { "id": "identity_335021", "image": "ScienceQA/images/4751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Indianapolis\n(B) Jackson\n(C) Charleston\n(D) Columbia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.578125, "loss": 4.148608207702637 }, { "id": "identity_332936", "image": "ScienceQA/images/19628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Massachusetts\n(B) Connecticut\n(C) Pennsylvania\n(D) New Hampshire" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.8115692138671875 }, { "id": "identity_261313", "image": "AI2D/images/3235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the dots in the structure?\nChoices:\n(A) ribosome\n(B) vacuole\n(C) none of the above\n(D) mitochondria" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.234375, "loss": 4.1861443519592285 }, { "id": "identity_332042", "image": "A-OKVQA/images/110251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is behind this?\nChoices:\n(A) cat\n(B) dog\n(C) antelope\n(D) playground" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.28125, "loss": 4.3473005294799805 }, { "id": "identity_201286", "image": "VQA2.0/images/99004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tent to the left of the center in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.515625, "loss": 4.2899370193481445 }, { "id": "identity_131072", "image": "Super-CLEVR/images/superCLEVR_new_001452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.34375, "loss": 4.246108055114746 }, { "id": "identity_37931", "image": "A-OKVQA/images/244171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these people observing?\nChoices:\n(A) festival\n(B) fireworks\n(C) firestorm\n(D) bonfire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.09375, "loss": 4.400851249694824 }, { "id": "identity_315698", "image": "AI2D/images/3265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does G represent in this?\n\nChoices:\n(A) mitochondrion\n(B) none of the above\n(C) lysosome\n(D) centriole" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.15625, "loss": 4.17343282699585 }, { "id": "identity_295994", "image": "FigureQA/images/6705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Web Purple or Light Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.46875, "loss": 4.532818794250488 }, { "id": "identity_333735", "image": "A-OKVQA/images/514243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of shoes is this person wearing?\nChoices:\n(A) sneakers\n(B) heels\n(C) flat\n(D) water shoes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5, "loss": 4.4267168045043945 }, { "id": "identity_322189", "image": "VQA2.0/images/474069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this clock tower?" }, { "from": "gpt", "value": "The answer is tan" } ], "clip_score": 25.109375, "loss": 4.270064353942871 }, { "id": "identity_7604", "image": "Super-CLEVR/images/superCLEVR_new_001499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of small matte motorbikes that are in front of the yellow rubber suv the same as the number of gray cars?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.375, "loss": 4.849976539611816 }, { "id": "identity_25616", "image": "A-OKVQA/images/216726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely be found here?\nChoices:\n(A) king cake\n(B) salmon\n(C) horse\n(D) tires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.4609375, "loss": 4.158231735229492 }, { "id": "identity_35679", "image": "PMC-VQA/images/PMC4802206_f11_13588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were taken? \nChoices:\n(A) Ultrasound images. \n(B) CT scan images. \n(C) X-ray images. \n(D) MRI images. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 4.1921563148498535 }, { "id": "identity_27149", "image": "ScienceQA/images/10381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) rough\n(B) fragile\n(C) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.546875, "loss": 4.374563694000244 }, { "id": "identity_30476", "image": "VQA2.0/images/165347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 27.1875, "loss": 4.091592788696289 }, { "id": "identity_279044", "image": "IconQA/images/56568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nOptions:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 4.719264984130859 }, { "id": "identity_273853", "image": "A-OKVQA/images/94052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item is located at the top of the dining table among the following options: (A) cat, (B) bananas, (C) fish heads, (D) yule log?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.59375, "loss": 4.446314811706543 }, { "id": "identity_45399", "image": "DocVQA/images/nypm0020_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many versions of the solicitation will be mailed?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.3125, "loss": 4.540949821472168 }, { "id": "identity_45889", "image": "DocVQA/images/ypbm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What industries are listed for specific meetings to be arranged?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.0625, "loss": 4.799539089202881 }, { "id": "identity_5702", "image": "TextVQA/images/070d47f646990950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of alcohol is that?" }, { "from": "gpt", "value": "The answer is barrique" } ], "clip_score": 20.5, "loss": 4.941133499145508 }, { "id": "identity_284513", "image": "FigureQA/images/64842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.796875, "loss": 4.760037422180176 }, { "id": "identity_333311", "image": "PMC-VQA/images/PMC3418000_F2_149475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body does this image show?\nChoices:\n(A) Hand\n(B) Foot\n(C) Brain\n(D) Spine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.53125, "loss": 4.685833930969238 }, { "id": "identity_252654", "image": "PMC-VQA/images/PMC2726940_pone-0006738-g006_43617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the purpose of the CT scanning tray, and how does its position relate to the overall analysis of the specimen?" }, { "from": "gpt", "value": "The answer is Ensures accurate, consistent scans for precise measurements and detailed images" } ], "clip_score": 23.9375, "loss": 4.38994836807251 }, { "id": "identity_269368", "image": "VQA2.0/images/119760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sofa cushion?" }, { "from": "gpt", "value": "The answer is tan" } ], "clip_score": 21.734375, "loss": 4.091753959655762 }, { "id": "identity_110010", "image": "TextVQA/images/f382dc4def4feb2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the liquid in the glass?" }, { "from": "gpt", "value": "The answer is Cloudy yellow" } ], "clip_score": 25.5, "loss": 4.255465030670166 }, { "id": "identity_109345", "image": "VizWiz/images/VizWiz_train_00007868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the product as indicated on the price tag?" }, { "from": "gpt", "value": "The answer is Hall's" } ], "clip_score": 25.15625, "loss": 4.464845180511475 }, { "id": "identity_20356", "image": "DVQA/images/bar_train_00061396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is mode" } ], "clip_score": 18.734375, "loss": 4.133709907531738 }, { "id": "identity_38533", "image": "PMC-VQA/images/PMC5602807_Fig1_216782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe of the liver is affected by the tumor? \nChoices:\n(A) right lobe \n(B) left lobe \n(C) both lobes \n(D) unknown " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 4.297737121582031 }, { "id": "identity_143209", "image": "MapQA/images/map_436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of petitions between 74.7% and 74.7%?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 35.84375, "loss": 4.087881088256836 }, { "id": "identity_333463", "image": "PMC-VQA/images/PMC8227570_children-08-00450-f001_476791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is indicated by this arrow?\nChoices:\n(A) Pituitary gland\n(B) Corpus Callosum\n(C) Spinal cord\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.6875, "loss": 5.372104644775391 }, { "id": "identity_124182", "image": "TextVQA/images/d54c3f498e43774e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many characters are visible on the rooftop of the building in the right magazine cover?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 23.59375, "loss": 4.1056294441223145 }, { "id": "identity_110511", "image": "Super-CLEVR/images/superCLEVR_new_001087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly to the left of the green car?" }, { "from": "gpt", "value": "The answer is An airplane" } ], "clip_score": 22.828125, "loss": 4.038741111755371 }, { "id": "identity_225445", "image": "Super-CLEVR/images/superCLEVR_new_001985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same shape as the rubber thing and is the same size as the gray metallic scooter?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 26.953125, "loss": 4.894885063171387 }, { "id": "identity_276360", "image": "A-OKVQA/images/575222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is depicted on the front of the train? Is it a bear, clown, elephant, or zebra?\nChoices:\n(A) bear\n(B) clown\n(C) elephant\n(D) zebra" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.130954265594482 }, { "id": "identity_32015", "image": "PMC-VQA/images/PMC7806997_Fig2_342618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown in panel A? \nChoices:\n(A) T1 image \n(B) FLAIR image \n(C) DWI image \n(D) T2 image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.537224292755127 }, { "id": "identity_294423", "image": "ScienceQA/images/8239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image provided with the following options: (A) New Hampshire, (B) Connecticut, (C) New York, or (D) Rhode Island?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.0, "loss": 4.1107587814331055 }, { "id": "identity_294749", "image": "PMC-VQA/images/PMC6408898_RSPB20182733F2_447708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue color indicate in the lesion-symptom map?\n(A) Change in GCC thickness in the lower quadrant\n(B) Change in GCC thickness in the upper quadrant\n(C) No change in GCC thickness\n(D) Change in GCC thickness in both the quadrants." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.196417808532715 }, { "id": "identity_284936", "image": "TQA/images/question_images/human_system_excretory_6108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which structure do the renal artery and vein branch off?\nChoices:\n(A) kidney\n(B) Vena cava\n(C) ureter\n(D) dorsal aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.3125, "loss": 4.3568596839904785 }, { "id": "identity_326162", "image": "A-OKVQA/images/250344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the blanket wrapped around this person?\nChoices:\n(A) is cold\n(B) is hiding\n(C) is confused\n(D) showing off" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.1875, "loss": 4.0613179206848145 }, { "id": "identity_336329", "image": "PMC-VQA/images/PMC7815743_Fig3_345489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to identify the tumor causing osteomalacia?\nChoices:\n(A) Magnetic resonance imaging (MRI)\n(B) Positron Emission Tomography (PET)\n(C) X-ray\n(D) Computed Tomography (CT)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.765625, "loss": 4.312056541442871 }, { "id": "identity_213633", "image": "A-OKVQA/images/158378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible shelves are there on the wall unit?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.640625, "loss": 4.909575462341309 }, { "id": "identity_265370", "image": "A-OKVQA/images/512838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is located at the center of the chips stand logo among the following choices: ship, boat, wheel, or potato?\nChoices:\n(A) ship\n(B) boat\n(C) wheel\n(D) potato" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 5.55859375, "loss": 4.05821418762207 }, { "id": "identity_49154", "image": "DocVQA/images/jrgv0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is planned after dinner at 6.00 pm?" }, { "from": "gpt", "value": "The answer is Theatre visit" } ], "clip_score": 23.625, "loss": 5.1514363288879395 }, { "id": "identity_106947", "image": "TextVQA/images/91890335c83cbd76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red square game pieces can be seen on the paper grid?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.484375, "loss": 4.487706184387207 }, { "id": "identity_129550", "image": "TextVQA/images/8ded7f21ae60aff4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible cabinet doors are there in the kitchen?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 21.078125, "loss": 4.65219259262085 }, { "id": "identity_29432", "image": "PMC-VQA/images/PMC7051105_FIG2_126949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrow in this image? \nChoices:\n(A) Torn muscle \n(B) Eighth arch fracture \n(C) Dislocated joint \n(D) Enlarged intercostal space " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.1875, "loss": 4.171921730041504 }, { "id": "identity_252273", "image": "DocVQA/images/jybx0223_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the report, what are the two main reasons the industry supports the specific duty structure for cigarettes over the ad-valorem structure, and how does this relate to their view on GST?" }, { "from": "gpt", "value": "The answer is Beneficial, superior; GST seen as tax on tax" } ], "clip_score": 20.59375, "loss": 4.755638599395752 }, { "id": "identity_269888", "image": "ScienceQA/images/10862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New York\n(B) North Carolina\n(C) New Jersey\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.796875, "loss": 4.127174377441406 }, { "id": "identity_314923", "image": "TQA/images/question_images/human_system_circulatory_3650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the heart in between the vena cava and another chamber?\nChoices:\n(A) left atrium\n(B) right atrium\n(C) aorta\n(D) left ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.25, "loss": 4.338261604309082 }, { "id": "identity_29312", "image": "A-OKVQA/images/314993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they hope you will do after you rest?\nChoices:\n(A) leave\n(B) go jogging\n(C) buy soda\n(D) help them" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.9375, "loss": 4.177252292633057 }, { "id": "identity_334725", "image": "AI2D/images/2747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The stages of the season are labeled A, J, F, and L. Which season is indicated at A?\nChoices:\n(A) summer\n(B) autumn\n(C) spring\n(D) winter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.203125, "loss": 4.158725738525391 }, { "id": "identity_143023", "image": "DVQA/images/bar_train_00050305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the top bar in the \"clerk\" group?" }, { "from": "gpt", "value": "The answer is storm" } ], "clip_score": 21.03125, "loss": 4.914789199829102 }, { "id": "identity_277361", "image": "AI2D/images/3764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the alternative name for the midriff organ among the following options: (A) carina, (B) pharynx, (C) diaphragm, or (D) larynx?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.96875, "loss": 4.15250301361084 }, { "id": "identity_223740", "image": "VQA2.0/images/170513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing sunglasses?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.125, "loss": 5.022533416748047 }, { "id": "identity_263466", "image": "A-OKVQA/images/325184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In comparison to the elephants, what is the elevation of those looking at them? Are they at the same elevation, lower, changing, or higher?\nChoices:\n(A) same\n(B) lower\n(C) changing\n(D) higher" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.375, "loss": 4.147111415863037 }, { "id": "identity_7178", "image": "A-OKVQA/images/485788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People sit in what area?\nChoices:\n(A) alleyway\n(B) stripper bar\n(C) public park\n(D) mall" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.28125, "loss": 4.281936168670654 }, { "id": "identity_330032", "image": "IconQA/images/20502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) impossible\n(B) probable\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.4375, "loss": 4.319441795349121 }, { "id": "identity_298567", "image": "A-OKVQA/images/204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation is depicted in the image? \nOptions:\n(A) road\n(B) air\n(C) rail\n(D) water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.6875, "loss": 4.723480701446533 }, { "id": "identity_3020", "image": "AI2D/images/3185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost part of the cell?\nChoices:\n(A) none of the above\n(B) chloroplast\n(C) cell wall\n(D) ribosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.984375, "loss": 4.103575229644775 }, { "id": "identity_270043", "image": "DVQA/images/bar_train_00102022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many groups of bars is there at least one bar with a value less than 9?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 23.484375, "loss": 4.061977386474609 }, { "id": "identity_334312", "image": "VizWiz/images/VizWiz_train_00006336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of drink is this?" }, { "from": "gpt", "value": "The answer is tin soda" } ], "clip_score": 28.234375, "loss": 4.093924045562744 }, { "id": "identity_334722", "image": "A-OKVQA/images/123907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What emotion is the person most likely feeling?\nChoices:\n(A) hate\n(B) anger\n(C) fear\n(D) anticipation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 4.9609375, "loss": 4.090123653411865 }, { "id": "identity_9277", "image": "AI2D/images/829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does A represent?\nChoices:\n(A) Larva\n(B) Adult\n(C) Pupa\n(D) Eggs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0, "loss": 4.054705619812012 }, { "id": "identity_327145", "image": "ScienceQA/images/10707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) New York\n(B) Virginia\n(C) New Jersey\n(D) Vermont" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.21875, "loss": 4.18267297744751 }, { "id": "identity_316539", "image": "TextVQA/images/1abcf3116b521888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the other text on this say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.0, "loss": 4.063018321990967 }, { "id": "identity_226831", "image": "TextVQA/images/e5b11d8b9ccc3025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what significant historical event is depicted in this image, and what were the main consequences of this event for the local population?" }, { "from": "gpt", "value": "The answer is Second Fleet arrival, poor convict conditions, colony relief, Indigenous disruption" } ], "clip_score": 21.71875, "loss": 4.241032600402832 }, { "id": "identity_257460", "image": "TextVQA/images/ec2f58fb65734522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sponsor's name on the team's jerseys, and how many players are wearing jerseys with even numbers?" }, { "from": "gpt", "value": "The answer is ORYX GTL, seven" } ], "clip_score": 22.671875, "loss": 4.469966411590576 }, { "id": "identity_123140", "image": "A-OKVQA/images/216449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the drink in the glass next to the pizza?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 17.953125, "loss": 4.114077568054199 }, { "id": "identity_299192", "image": "PMC-VQA/images/PMC7317310_fig1_199546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of stain was utilized to observe the lung sections among the following options: (A) Fluorescent stain, (B) HE stain, (C) Iodine stain, or (D) Gram stain?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.53125, "loss": 4.743011951446533 }, { "id": "identity_277877", "image": "PMC-VQA/images/PMC6375371_F1_436076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific location of the cyst observed on the MRI scan among the following options? \n(A) D5-D8 \n(B) L2-L5 \n(C) Cervical spine \n(D) Lower pole." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.25, "loss": 4.7400994300842285 }, { "id": "identity_9125", "image": "Super-CLEVR/images/superCLEVR_new_000127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that is the same size as the blue matte motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 19.265625, "loss": 5.107468605041504 }, { "id": "identity_300590", "image": "ScienceQA/images/2441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Raleigh\n(B) Burlington\n(C) Montpelier\n(D) Providence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.96875, "loss": 4.123553276062012 }, { "id": "identity_16180", "image": "IconQA/images/88603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.96875, "loss": 4.483338832855225 }, { "id": "identity_318493", "image": "DocVQA/images/zmvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of Mrs. Charlene Moore?" }, { "from": "gpt", "value": "The answer is director" } ], "clip_score": 16.90625, "loss": 4.07531213760376 }, { "id": "identity_14477", "image": "AI2D/images/1082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label is pointing to Vacuole?\nChoices:\n(A) B\n(B) J\n(C) A\n(D) H" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.046875, "loss": 4.063675880432129 }, { "id": "identity_9138", "image": "DocVQA/images/ysbw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the meeting society of adolescent medicine?" }, { "from": "gpt", "value": "The answer is Davis" } ], "clip_score": 16.8125, "loss": 4.771361827850342 }, { "id": "identity_247450", "image": "TextVQA/images/ed7e1688e00a4f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the publication date of this book and its name?" }, { "from": "gpt", "value": "The answer is 13 de Fon Fon" } ], "clip_score": 21.125, "loss": 4.015999794006348 }, { "id": "identity_272185", "image": "ScienceQA/images/5804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Texas?\n(A) Austin\n(B) Frankfort\n(C) Santa Fe\n(D) Houston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.40625, "loss": 4.229247570037842 }, { "id": "identity_266192", "image": "TQA/images/abc_question_images/photosynthesis_11312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the symbol that represents water from the given choices:\n(A) Y\n(B) V\n(C) P\n(D) J" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.21875, "loss": 4.730030059814453 }, { "id": "identity_326642", "image": "Super-CLEVR/images/superCLEVR_new_001481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any small cyan helicopters visible?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.5625, "loss": 5.208756446838379 }, { "id": "identity_197280", "image": "PMC-VQA/images/PMC5147411_F3_103813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different views of the brain are presented in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.40625, "loss": 4.562735080718994 }, { "id": "identity_70767", "image": "AI2D/images/3128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"STOMA\" in the diagram?" }, { "from": "gpt", "value": "The answer is Gas exchange" } ], "clip_score": 25.65625, "loss": 4.051258563995361 }, { "id": "identity_326865", "image": "A-OKVQA/images/85081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is this person taking part in?\nChoices:\n(A) hiking\n(B) roller skating\n(C) skiboarding\n(D) skiing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.6875, "loss": 4.223324775695801 }, { "id": "identity_249992", "image": "Super-CLEVR/images/superCLEVR_new_001753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same size as the green sedan, and what are their colors?" }, { "from": "gpt", "value": "The answer is Zero" } ], "clip_score": 29.328125, "loss": 4.6252336502075195 }, { "id": "identity_144558", "image": "DocVQA/images/kzhb0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Principal Investigator?" }, { "from": "gpt", "value": "The answer is Willhoft, Donald G" } ], "clip_score": 24.296875, "loss": 4.035315036773682 }, { "id": "identity_311904", "image": "DVQA/images/bar_train_00060914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars have at least one bar with a value greater than 6?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 22.859375, "loss": 4.128641128540039 }, { "id": "identity_335007", "image": "A-OKVQA/images/176008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this location?\nChoices:\n(A) subway\n(B) plane\n(C) bus\n(D) train" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.125, "loss": 4.172105312347412 }, { "id": "identity_104730", "image": "VQA-AS/images/12955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stools are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.96875, "loss": 4.5043745040893555 }, { "id": "identity_327574", "image": "PMC-VQA/images/PMC2409170_fig3_23603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of staining is shown in this area?\nChoices:\n(A) PAS staining\n(B) H&E staining\n(C) Immunofluorescence staining\n(D) Giemsa staining" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.1227216720581055 }, { "id": "identity_33486", "image": "PMC-VQA/images/PMC5907520_fig1_303828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modality was used to obtain the image?\nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.078125, "loss": 4.115442276000977 }, { "id": "identity_314407", "image": "Super-CLEVR/images/superCLEVR_new_000350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the dirtbike to the left of the large purple thing the same as the color of the large motorbike in front of the large yellow metal road bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.546875, "loss": 5.277094841003418 }, { "id": "identity_290344", "image": "PMC-VQA/images/PMC2575188_F2_29522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of tumor is being pointed to by the white arrow in the image? Is it:\n(A) Melanoma\n(B) Subcutaneous\n(C) Brain\n(D) Pancreatic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.854118824005127 }, { "id": "identity_56598", "image": "DVQA/images/bar_train_00097088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest number of people preferring the 'police' object?" }, { "from": "gpt", "value": "The answer is size" } ], "clip_score": 31.875, "loss": 4.295183181762695 }, { "id": "identity_21725", "image": "PMC-VQA/images/PMC6060136_Fig2_345899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 1. What was the orientation of the long axis of the tooth?\nChoices:\n(A) parallel to labial surfaces \n(B) perpendicular to labial surfaces \n(C) parallel to occlusal surfaces \n(D) perpendicular to occlusal surfaces" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.75, "loss": 4.13018274307251 }, { "id": "identity_34022", "image": "IconQA/images/91426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 8:00\n(B) 9:00" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 4.040587902069092 }, { "id": "identity_56567", "image": "DocVQA/images/khwg0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the handwritten text written above \"CORE REQUIREMENTS\"?" }, { "from": "gpt", "value": "The answer is All of the above plus checked below" } ], "clip_score": 19.328125, "loss": 4.001678466796875 }, { "id": "identity_324651", "image": "A-OKVQA/images/59367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would you most likely see these long orange things?\nChoices:\n(A) classroom\n(B) doctor's office\n(C) pool\n(D) cafe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.671875, "loss": 4.200405120849609 }, { "id": "identity_44833", "image": "MapQA/images/map_1579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color shade representing the range 2220132-2300234?" }, { "from": "gpt", "value": "The answer is Dark Brown" } ], "clip_score": 21.203125, "loss": 4.695478439331055 }, { "id": "identity_33399", "image": "PMC-VQA/images/PMC2966750_F0001_77672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific type of injury identified in the radiograph? \nChoices:\n(A) Humeral fracture and Type IV AC separation \n(B) Coracoid fracture and Type III AC separation \n(C) Shoulder blade fracture and Type V AC separation \n(D) Clavicle fracture and Type II AC separation " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.59375, "loss": 4.456043243408203 }, { "id": "identity_2000", "image": "A-OKVQA/images/421978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of product do ad men use these type horses to market?\nChoices:\n(A) horse food\n(B) beer\n(C) pizza\n(D) muffins" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.0432586669921875 }, { "id": "identity_308119", "image": "PMC-VQA/images/PMC8272411_fig-2_7328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue arrow indicate in the chest CT reconstruction?\nChoices:\n(A) Fistulous connection \n(B) Lung inflammation \n(C) Heart attack \n(D) Pseudoaneurysm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.96875, "loss": 5.1175127029418945 }, { "id": "identity_20973", "image": "PMC-VQA/images/PMC5415669_fig3_159996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the red arrow in the image?\nChoices:\n(A) Septal thickening \n(B) Chronic inflammatory infiltrate \n(C) Alveolar destruction \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.421875, "loss": 4.680727481842041 }, { "id": "identity_296925", "image": "PMC-VQA/images/PMC5496392_Fig4_186289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the V sign on lateral radiographs signify?\nChoices:\n(A) position of the spine\n(B) location of a fracture\n(C) alignment of the hips\n(D) angle of the knee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.8125, "loss": 4.636693000793457 }, { "id": "identity_241952", "image": "TextVQA/images/c10cb03d3fa9c173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which companies are sponsors, and how many different companies are there?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 11.3203125, "loss": 5.413186073303223 }, { "id": "identity_278686", "image": "AI2D/images/3656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that indicates the pulmonary artery among the following choices:\n(A) B\n(B) F\n(C) M\n(D) Q" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.71875, "loss": 4.907749176025391 }, { "id": "identity_331255", "image": "A-OKVQA/images/83060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bag is hanging from the cupboard?\nChoices:\n(A) grocery bag\n(B) backpack\n(C) purse\n(D) satchel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.671875, "loss": 4.145591735839844 }, { "id": "identity_301954", "image": "A-OKVQA/images/313201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What food is this place known for?\nChoices:\n(A) pizza\n(B) poutine\n(C) sushi\n(D) borsht" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.8125, "loss": 4.08397912979126 }, { "id": "identity_329928", "image": "PMC-VQA/images/PMC7560692_fig0010_274447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the goal of this image?\nChoices:\n(A) Show double-label staining of Aβ and Iba1 \n(B) Show double-label staining of HAP1 and NeuN \n(C) Display the structure of neurons \n(D) Show a comparison between NeuN and Iba1." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.265625, "loss": 4.719084739685059 }, { "id": "identity_34317", "image": "A-OKVQA/images/102461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the officers here observe?\nChoices:\n(A) party\n(B) horse race\n(C) candled cake\n(D) protest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.5625, "loss": 4.30484676361084 }, { "id": "identity_198274", "image": "TextVQA/images/f98670f6cba969ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing white tops in the crowd directly behind the baseball player with jersey number 38?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.78125, "loss": 4.548732757568359 }, { "id": "identity_264248", "image": "A-OKVQA/images/121457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the shoreline, which boat is the most visible among the following options: a sailboat, nothing, a jet ski, or a motorboat?\nChoices:\n(A) sailboat\n(B) nothing\n(C) jet ski\n(D) motorboat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.048651695251465 }, { "id": "identity_24725", "image": "A-OKVQA/images/515660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the yellow substance for?\nChoices:\n(A) sweetening beverage\n(B) cleaning plate\n(C) cleaning hands\n(D) dipping sauce" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.5, "loss": 4.496249198913574 }, { "id": "identity_210100", "image": "A-OKVQA/images/36417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the stapler on the desk?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 15.4453125, "loss": 4.016122817993164 }, { "id": "identity_207", "image": "PMC-VQA/images/PMC7668509_F3_298995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the procedure shown in the radiographs? \nChoices:\n(A) Endodontic therapy \n(B) Implant placement \n(C) Crown lengthening \n(D) Orthodontic treatment " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.3125, "loss": 4.278517246246338 }, { "id": "identity_284759", "image": "ScienceQA/images/16329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object?\nChoices:\n(A) scratchy\n(B) sweet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.21875, "loss": 4.3739519119262695 }, { "id": "identity_266505", "image": "VQA2.0/images/81188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe the outfit the lady is wearing?" }, { "from": "gpt", "value": "The answer is hat" } ], "clip_score": 18.375, "loss": 4.2604780197143555 }, { "id": "identity_233", "image": "ScienceQA/images/5908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Salem\n(C) Tampa\n(D) Orlando" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 4.089571952819824 }, { "id": "identity_216466", "image": "A-OKVQA/images/382873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.640625, "loss": 4.293885231018066 }, { "id": "identity_310875", "image": "AI2D/images/3001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is at the center of this planet?\nChoices:\n(A) water\n(B) metal\n(C) ice\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.90625, "loss": 4.250414848327637 }, { "id": "identity_237161", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shiny aeroplane in front of the big purple thing and the metallic aeroplane behind the big aeroplane both larger than the blue aeroplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.5, "loss": 4.924940586090088 }, { "id": "identity_30318", "image": "A-OKVQA/images/395822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside the tall glasses?\nChoices:\n(A) candles\n(B) beer\n(C) wine\n(D) candy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.359375, "loss": 4.109364986419678 }, { "id": "identity_119206", "image": "VQA2.0/images/344566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the frisbee in the air?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 23.921875, "loss": 5.042721748352051 }, { "id": "identity_264889", "image": "FigureQA/images/39234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Purple or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.71875, "loss": 4.470879077911377 }, { "id": "identity_149854", "image": "DocVQA/images/ztwc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who confirmed the appointment?" }, { "from": "gpt", "value": "The answer is Jane Wyatt" } ], "clip_score": 25.015625, "loss": 4.31051778793335 }, { "id": "identity_288692", "image": "IconQA/images/106316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture comes after the first picture of the moon?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.71875, "loss": 4.074992656707764 }, { "id": "identity_293516", "image": "A-OKVQA/images/176588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity are the individuals engaged in?\nOptions:\n(A) playing volleyball\n(B) playing frisbee\n(C) dancing\n(D) hopscotch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.319750785827637 }, { "id": "identity_275890", "image": "PMC-VQA/images/PMC3921228_pone-0088785-g001_266413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the images, which side of the brain is depicted?\nChoices:\n(A) right\n(B) left\n(C) both\n(D) none" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.326470375061035 }, { "id": "identity_201193", "image": "A-OKVQA/images/162753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing striped uniforms?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.25, "loss": 4.7416229248046875 }, { "id": "identity_26035", "image": "A-OKVQA/images/45222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what city is the company that makes this beverage located?\nChoices:\n(A) philadelphia\n(B) des moines\n(C) newport\n(D) miami" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.3125, "loss": 4.0175580978393555 }, { "id": "identity_204292", "image": "PMC-VQA/images/PMC3942746_toxins-06-00464-f004_271682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the inclusions within the cells?" }, { "from": "gpt", "value": "The answer is Orange and yellow" } ], "clip_score": 22.265625, "loss": 4.03231954574585 }, { "id": "identity_327729", "image": "A-OKVQA/images/189741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are most of the people shielded from?\nChoices:\n(A) upcoming rain\n(B) stampeding elephants\n(C) fire\n(D) falling anvils" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.796875, "loss": 4.290071964263916 }, { "id": "identity_264718", "image": "PMC-VQA/images/PMC6799394_F1_52713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the artery depicted in the images:\n(A) Right Vertebral Artery\n(B) Left Vertebral Artery\n(C) Femoral Artery\n(D) Carotid Artery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.0, "loss": 4.084137916564941 }, { "id": "identity_325369", "image": "A-OKVQA/images/497610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fun activity is depicted?\nChoices:\n(A) snow boarding\n(B) bumper cars\n(C) skiing\n(D) rock climbing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.274295806884766 }, { "id": "identity_302077", "image": "AI2D/images/3513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does letter F indicate?\nChoices:\n(A) Nucleus\n(B) Chloroplast\n(C) Cell Wall\n(D) Cell Membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.84375, "loss": 4.566053867340088 }, { "id": "identity_326506", "image": "A-OKVQA/images/90103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicles are not allowed past this pole?\nChoices:\n(A) taxi\n(B) bikes\n(C) van\n(D) bus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.28125, "loss": 4.197659492492676 }, { "id": "identity_264960", "image": "A-OKVQA/images/238448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located on both sides of the clock?\nChoices:\n(A) cats\n(B) reptiles\n(C) birds\n(D) statues" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.828125, "loss": 4.147554874420166 }, { "id": "identity_187449", "image": "A-OKVQA/images/181836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the queue?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.53125, "loss": 4.320619106292725 }, { "id": "identity_212551", "image": "TextVQA/images/5298ad00fa6c17a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be seen on the right side of the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.578125, "loss": 4.694058418273926 }, { "id": "identity_297408", "image": "FigureQA/images/5062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Gray or Light Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.546875, "loss": 4.528936862945557 }, { "id": "identity_188015", "image": "A-OKVQA/images/432704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys are visible on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.171875, "loss": 4.368311882019043 }, { "id": "identity_336102", "image": "DocVQA/images/jzbn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abbreviation for this term?" }, { "from": "gpt", "value": "The answer is BEs" } ], "clip_score": 16.671875, "loss": 4.781012058258057 }, { "id": "identity_11002", "image": "Super-CLEVR/images/superCLEVR_new_000201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the brown thing that is in front of the tiny brown matte aeroplane and behind the brown motorbike?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 26.421875, "loss": 4.242525100708008 }, { "id": "identity_261287", "image": "A-OKVQA/images/446481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time of day is it permissible to only go straight or turn right?\nChoices:\n(A) late afternoon\n(B) midnight\n(C) morning\n(D) none" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.4453125, "loss": 4.175799369812012 }, { "id": "identity_332848", "image": "Super-CLEVR/images/superCLEVR_new_000602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects of the same shape as this large brown object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 11.859375, "loss": 4.954468727111816 }, { "id": "identity_200467", "image": "A-OKVQA/images/25996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are seated on the ground?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.609375, "loss": 4.691012382507324 }, { "id": "identity_111888", "image": "TextVQA/images/002ed4b2e22b51e6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of years being celebrated as shown on the bus?" }, { "from": "gpt", "value": "The answer is 322 anos" } ], "clip_score": 27.75, "loss": 4.354065418243408 }, { "id": "identity_316698", "image": "ScienceQA/images/15012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) slippery\n(B) bouncy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 4.1502604484558105 }, { "id": "identity_149968", "image": "DocVQA/images/jzbn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the text, what does ACC suggest EPA develop?" }, { "from": "gpt", "value": "The answer is A neutral name to describe a list of candidates for potential prioritization" } ], "clip_score": 24.453125, "loss": 4.22346305847168 }, { "id": "identity_22764", "image": "PMC-VQA/images/PMC5815729_F1_276744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used for image registration? \nChoices:\n(A) CT0 \n(B) CT50 \n(C) Rigid \n(D) Manual " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.078125, "loss": 4.111754417419434 }, { "id": "identity_329289", "image": "DocVQA/images/nyjw0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of Dr. Darrel Van Campen?" }, { "from": "gpt", "value": "The answer is Director" } ], "clip_score": 18.921875, "loss": 4.237112522125244 }, { "id": "identity_300529", "image": "A-OKVQA/images/340736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of place is this likely to be?\nChoices:\n(A) buffet restaurant\n(B) market\n(C) commercial kitchen\n(D) domestic kitchen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0625, "loss": 4.2195587158203125 }, { "id": "identity_287724", "image": "PMC-VQA/images/PMC4891826_Fig2_37394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrangement of cartilage formation depicted in the images?\nChoices:\n(A) Contiguous \n(B) Noncontiguous \n(C) Regular \n(D) Irregular" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.671875, "loss": 4.495379447937012 }, { "id": "identity_287473", "image": "A-OKVQA/images/315621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in front of the yellow building among the following options: a monkey, a cow, a motorbike, or a baby?\nChoices:\n(A) monkey\n(B) cow\n(C) motor bike\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.296875, "loss": 4.106735706329346 }, { "id": "identity_221826", "image": "PMC-VQA/images/PMC6779781_F2_46554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the retinal abnormalities located?" }, { "from": "gpt", "value": "The answer is Not applicable" } ], "clip_score": 25.703125, "loss": 4.850006580352783 }, { "id": "identity_243753", "image": "Super-CLEVR/images/superCLEVR_new_000378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image, and how many of them are not airplanes?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.875, "loss": 4.1421074867248535 }, { "id": "identity_277040", "image": "DVQA/images/bar_train_00075232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm in the dataset has a smaller cost: the algorithm school or the algorithm road in the dataset canvas, based on their accuracy?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.890625, "loss": 4.215084552764893 }, { "id": "identity_254366", "image": "IconQA/images/48932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not blue, and what is the sum of the numerators and denominators of the fractions representing the blue and non-blue parts?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 32.09375, "loss": 4.597503662109375 }, { "id": "identity_71171", "image": "TQA/images/abc_question_images/faults_11736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geological process is indicated by the white arrows?" }, { "from": "gpt", "value": "The answer is Rifting" } ], "clip_score": 30.078125, "loss": 4.4283447265625 }, { "id": "identity_297739", "image": "PMC-VQA/images/PMC5085966_Fig1_89262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to assess the tumor? \nOptions:\n(A) MRI\n(B) Endoscopic ultrasonography\n(C) X-ray\n(D) CT-scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.03125, "loss": 4.492770195007324 }, { "id": "identity_130652", "image": "TextVQA/images/f265655b7ede17f7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many camera lenses are visible on the phone case?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.6875, "loss": 4.669543266296387 }, { "id": "identity_265618", "image": "AI2D/images/2860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which part is represented by letter A?\nChoices:\n(A) Middle Ear\n(B) Pinna\n(C) Outer Ear\n(D) Stapes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.3137383460998535 }, { "id": "identity_14380", "image": "A-OKVQA/images/55594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From what do most of the items sold here come from?\nChoices:\n(A) plants\n(B) people\n(C) factory\n(D) animals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.1875, "loss": 4.0903849601745605 }, { "id": "identity_266982", "image": "FigureQA/images/59258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.21875, "loss": 4.5975446701049805 }, { "id": "identity_225530", "image": "Super-CLEVR/images/superCLEVR_new_001757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle that is to the right of the green car that is behind the big shiny thing that is left of the bus that is behind the small articulated bus?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 29.421875, "loss": 4.109888553619385 }, { "id": "identity_272679", "image": "PMC-VQA/images/PMC7403371_Fig1_226922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the abnormality depicted in the image. \nOptions:\n(A) Ventricular septal defect,\n(B) Patent ductus arteriosus,\n(C) Atrial septal defect,\n(D) Tetralogy of Fallot." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.09375, "loss": 4.303154945373535 }, { "id": "identity_302812", "image": "PMC-VQA/images/PMC4336564_fig05_360838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the white dashed arrows indicate in this image? \nChoices:\n(A) Particle internalisation \n(B) Particle agglomeration \n(C) Cell nuclei staining \n(D) Particles outside from cells" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.453125, "loss": 4.47510290145874 }, { "id": "identity_221892", "image": "VQA2.0/images/357572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person holding the sign wearing on their head?" }, { "from": "gpt", "value": "The answer is A red hat" } ], "clip_score": 20.8125, "loss": 4.21411657333374 }, { "id": "identity_333636", "image": "A-OKVQA/images/13944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of work is the person in the lift doing?\nChoices:\n(A) animal remediation\n(B) pothole\n(C) electrical\n(D) road sign" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.28125, "loss": 4.470484256744385 }, { "id": "identity_7797", "image": "PMC-VQA/images/PMC5035136_f0025_76894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in the right-hand scatterplots? \nChoices:\n(A) PPI correlation \n(B) Contrast of feature \n(C) MDN connectivity \n(D) Performance measurement " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.1875, "loss": 4.48138952255249 }, { "id": "identity_31672", "image": "Super-CLEVR/images/superCLEVR_new_000972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large metal tandem bikes than yellow matte regular buss?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.65625, "loss": 4.89387845993042 }, { "id": "identity_332379", "image": "PMC-VQA/images/PMC6627094_ijms-20-03047-f007_7646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was captured in the study?\nChoices:\n(A) HER2 expression\n(B) pancreatic cancer cells\n(C) bone marrow\n(D) lungs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.21875, "loss": 4.258011817932129 }, { "id": "identity_287972", "image": "A-OKVQA/images/359451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of products is the person at the rightmost kiosk, wearing a blue apron, selling?\nChoices:\n(A) food\n(B) shoes\n(C) clothes\n(D) souvenirs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.59375, "loss": 4.6523518562316895 }, { "id": "identity_143263", "image": "MapQA/images/map_3325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall under the 72.9%-73.7% range?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 27.390625, "loss": 4.035437107086182 }, { "id": "identity_210446", "image": "PMC-VQA/images/PMC6721519_cells-08-00768-f006_32228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cells are fully visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.359375, "loss": 4.563295841217041 }, { "id": "identity_22825", "image": "PMC-VQA/images/PMC6214924_Fig4_390574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red numbers represent in the image? \nChoices:\n(A) Total heart cells quantified by FACS \n(B) Percent of RFP+ quantified by FACS \n(C) Percent of NC-Cms relative to total cardiomyocytes quantified by FACS \n(D) Total cardiomyocytes quantified by FACS " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.34375, "loss": 4.022684574127197 }, { "id": "identity_294296", "image": "FigureQA/images/12596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.27077579498291 }, { "id": "identity_48675", "image": "DocVQA/images/jmwn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the entry numbered 12?" }, { "from": "gpt", "value": "The answer is Sticky Note" } ], "clip_score": 16.171875, "loss": 4.25187349319458 }, { "id": "identity_143489", "image": "DocVQA/images/jsbf0004_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the marketing piece?" }, { "from": "gpt", "value": "The answer is Fall '96 Rescreen - \"Colors II\" Original" } ], "clip_score": 20.3125, "loss": 5.177480220794678 }, { "id": "identity_263565", "image": "AI2D/images/4211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the rock cycle diagram, which letter corresponds to the process of melting?\nChoices:\n(A) E\n(B) A\n(C) B\n(D) G" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.265625, "loss": 4.09353494644165 }, { "id": "identity_150415", "image": "ScienceQA/images/12301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object that is exerting the push?" }, { "from": "gpt", "value": "The answer is A weight" } ], "clip_score": 21.328125, "loss": 5.4897613525390625 }, { "id": "identity_20518", "image": "A-OKVQA/images/206102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being recorded?\nChoices:\n(A) music\n(B) movie\n(C) podcast\n(D) youtube video" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9609375, "loss": 4.053470134735107 }, { "id": "identity_30089", "image": "IconQA/images/27631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.630711078643799 }, { "id": "identity_323117", "image": "A-OKVQA/images/129334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the small items on the display on the left called?\nChoices:\n(A) icons\n(B) bugs\n(C) snowflakes\n(D) cracks" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.921875, "loss": 4.414568901062012 }, { "id": "identity_291638", "image": "A-OKVQA/images/263359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Along what do these horseback riders ride?\nChoices:\n(A) desert\n(B) ocean\n(C) city\n(D) streambed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.75, "loss": 4.094675064086914 }, { "id": "identity_130484", "image": "PMC-VQA/images/PMC5215737_F2_110414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many regions of interest (ROIs) are marked in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.484375, "loss": 4.521370887756348 }, { "id": "identity_193054", "image": "PMC-VQA/images/PMC6347472_f0010_427498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.53125, "loss": 4.214310169219971 }, { "id": "identity_334026", "image": "PMC-VQA/images/PMC5815779_F1_276828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic test used to identify this condition?\nChoices:\n(A) MRI\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.09375, "loss": 4.495984077453613 }, { "id": "identity_182812", "image": "PMC-VQA/images/PMC3056371_F0004_89892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the orientation markers in the image?" }, { "from": "gpt", "value": "The answer is R for right side" } ], "clip_score": 18.046875, "loss": 4.524810791015625 }, { "id": "identity_315927", "image": "A-OKVQA/images/442223.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of seating would one expect to find in this place?\nChoices:\n(A) futons\n(B) booths\n(C) pews\n(D) stools" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.578125, "loss": 4.398731231689453 }, { "id": "identity_130217", "image": "TextVQA/images/8c70eac1edc9b8be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person doing based on their posture and the context of the image?" }, { "from": "gpt", "value": "The answer is Standing and possibly conversing" } ], "clip_score": 11.875, "loss": 4.231444835662842 }, { "id": "identity_328695", "image": "AI2D/images/1151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the innermost part of this flower?\nChoices:\n(A) Petal\n(B) Sepal\n(C) Ovule\n(D) Leaf" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 4.179971218109131 }, { "id": "identity_203796", "image": "PMC-VQA/images/PMC7410867_Fig1_229883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the arrows?" }, { "from": "gpt", "value": "The answer is Left ventricle wall" } ], "clip_score": 28.109375, "loss": 4.2764105796813965 }, { "id": "identity_202332", "image": "PMC-VQA/images/PMC4898369_Fig1_39149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lung fields clear or do they show any opacities?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 27.671875, "loss": 4.295389652252197 }, { "id": "identity_25983", "image": "PMC-VQA/images/PMC4493102_pone.0129696.g001_403414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black arrow indicate in the figure? \nChoices:\n(A) nucleus breaking down \n(B) formation of nucleus \n(C) formation of mitochondria \n(D) formation of starch molecules " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.828125, "loss": 4.360795021057129 }, { "id": "identity_274317", "image": "A-OKVQA/images/114744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where is this water sourced?\nChoices:\n(A) hose\n(B) sky\n(C) water nozzle\n(D) fire hydrant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.4296875, "loss": 4.304112434387207 }, { "id": "identity_313929", "image": "AI2D/images/174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the point on the earth's surface directly above the earthquake's focus?\nChoices:\n(A) focus\n(B) waves\n(C) epicenter\n(D) seismic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.46875, "loss": 4.063835620880127 }, { "id": "identity_261419", "image": "TQA/images/abc_question_images/earth_moon_phases_16010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the full moon?\nChoices:\n(A) W\n(B) M\n(C) H\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 4.093865871429443 }, { "id": "identity_222087", "image": "Super-CLEVR/images/superCLEVR_new_000606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.15625, "loss": 4.1169586181640625 }, { "id": "identity_222739", "image": "VizWiz/images/VizWiz_train_00000804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the object in the image?" }, { "from": "gpt", "value": "The answer is Not fresh" } ], "clip_score": 17.21875, "loss": 5.0904927253723145 }, { "id": "identity_17802", "image": "FigureQA/images/95138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Crimson have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 4.1082539558410645 }, { "id": "identity_334242", "image": "IconQA/images/19843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a boat. What is the second image?\nChoices:\n(A) bucket\n(B) boat\n(C) crab" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.515625, "loss": 4.01298713684082 }, { "id": "identity_101902", "image": "IconQA/images/1521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the scarf on the snowman in the middle?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 32.9375, "loss": 4.409511566162109 }, { "id": "identity_270233", "image": "PMC-VQA/images/PMC1839090_F1_10209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location of the scapula is the osteochondroma found?\nChoices:\n(A) superior aspect of the anterior scapula\n(B) inferior aspect of the posterior scapula\n(C) inferior aspect of the anterior scapula\n(D) superior aspect of the posterior scapula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.380288600921631 }, { "id": "identity_35891", "image": "PMC-VQA/images/PMC6938471_Fig1_92791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the bottom row? \nChoices:\n(A) Initial T1 SE pre contrast. \n(B) Initial T1 SE post contrast (a). \n(C) T2 weighted image. \n(D) FLAIR image. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.78125, "loss": 4.2023210525512695 }, { "id": "identity_23471", "image": "A-OKVQA/images/254653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What group of people are accommodated in this bus?\nChoices:\n(A) handicapped\n(B) elderly\n(C) blind\n(D) pregnant women" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.921875, "loss": 4.01371955871582 }, { "id": "identity_215392", "image": "Super-CLEVR/images/superCLEVR_new_000348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.34375, "loss": 4.072668075561523 }, { "id": "identity_26276", "image": "A-OKVQA/images/235687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type flag elements appear in a pictured umbrella?\nChoices:\n(A) nazi\n(B) american\n(C) gay rainbow\n(D) chilean" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.0625, "loss": 4.371943473815918 }, { "id": "identity_103978", "image": "VQA2.0/images/304584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.09375, "loss": 4.455824375152588 }, { "id": "identity_49394", "image": "DocVQA/images/xkpl0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the email address of the person who signed the form?" }, { "from": "gpt", "value": "The answer is bohanoh@rjrt.com" } ], "clip_score": 22.3125, "loss": 4.0667338371276855 }, { "id": "identity_214916", "image": "A-OKVQA/images/43202.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.96875, "loss": 4.400042533874512 }, { "id": "identity_24200", "image": "A-OKVQA/images/477468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the green items on top of the tomatoes on the man's sandwich?\nChoices:\n(A) relish\n(B) avocados\n(C) lettuce\n(D) pickles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.625, "loss": 4.452532768249512 }, { "id": "identity_190922", "image": "PMC-VQA/images/PMC5167692_F1_106251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color tone of the MRI image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 34.03125, "loss": 4.142891883850098 }, { "id": "identity_318382", "image": "FigureQA/images/39854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sienna the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.453125, "loss": 4.195845603942871 }, { "id": "identity_302086", "image": "IconQA/images/88119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.1875, "loss": 4.060420513153076 }, { "id": "identity_337966", "image": "A-OKVQA/images/285296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of the person in the black shirt and dress pants behind the players?\nChoices:\n(A) coach\n(B) judge pitches\n(C) medical staff\n(D) broadcaster" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.15625, "loss": 4.952645301818848 }, { "id": "identity_264723", "image": "PMC-VQA/images/PMC7468463_F2_247268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to generate the image?\nOptions:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.546875, "loss": 4.004690647125244 }, { "id": "identity_288720", "image": "ScienceQA/images/6292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\nChoices:\n(A) Nampa\n(B) Richmond\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.21945858001709 }, { "id": "identity_17692", "image": "IconQA/images/69253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a crab. Which picture is third?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.9375, "loss": 4.03333044052124 }, { "id": "identity_33361", "image": "A-OKVQA/images/324428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the large mechanical device on the back of the boats do?\nChoices:\n(A) propel\n(B) hold bait\n(C) anchor\n(D) catch fish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.46875, "loss": 4.562323093414307 }, { "id": "identity_22046", "image": "ScienceQA/images/17642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Buffalo\n(B) Carson City\n(C) Las Vegas\n(D) Reno" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.235033988952637 }, { "id": "identity_299558", "image": "A-OKVQA/images/565869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found in the water among the following options: a submarine, a surfboarder, a boat, or a seal?\nChoices:\n(A) submarine\n(B) surfboarder\n(C) boat\n(D) seal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.734375, "loss": 4.190791606903076 }, { "id": "identity_35108", "image": "IconQA/images/81674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.15625, "loss": 4.693498611450195 }, { "id": "identity_325825", "image": "TextVQA/images/b51119bf9d5d95e9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the name in the middle at the very bottom?" }, { "from": "gpt", "value": "The answer is elior" } ], "clip_score": 16.609375, "loss": 4.053853511810303 }, { "id": "identity_337086", "image": "ScienceQA/images/1817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony depicted?\nChoices:\n(A) North Carolina\n(B) Mississippi\n(C) Florida\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.671875, "loss": 4.173680782318115 }, { "id": "identity_18282", "image": "FigureQA/images/82592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.066750526428223 }, { "id": "identity_28472", "image": "A-OKVQA/images/185334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the bikes covered with junk?\nChoices:\n(A) for sale\n(B) showing off\n(C) confused\n(D) find easily" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.796875, "loss": 4.300660133361816 }, { "id": "identity_329334", "image": "A-OKVQA/images/6998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person riding their bike?\nChoices:\n(A) sidewalk\n(B) street\n(C) boardwalk\n(D) bike lane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.921875, "loss": 4.014144420623779 }, { "id": "identity_299267", "image": "A-OKVQA/images/532994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is taking place in this scenario?\nChoices:\n(A) olympic contest\n(B) car race\n(C) fair\n(D) cattle call" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.859375, "loss": 4.277256011962891 }, { "id": "identity_28430", "image": "PMC-VQA/images/PMC6613253_Fig2_5287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are shown in the middle portion of the figure? \nChoices:\n(A) 4D-Mode images of the airway \n(B) Actual images of nasopharyngoscopy \n(C) 3D-Mode images of the velopharyngeal insufficiency \n(D) 2D-Mode images of the virtual nasopharyngoscopy-mode " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.703125, "loss": 4.499476909637451 }, { "id": "identity_201507", "image": "A-OKVQA/images/385326.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.5703125, "loss": 4.261074542999268 }, { "id": "identity_264844", "image": "FigureQA/images/89384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly states whether Dark Slate is the smoothest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.109375, "loss": 4.178128242492676 }, { "id": "identity_268733", "image": "TextVQA/images/0bd60dc408159a20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the recommended action to take with the large box?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.5, "loss": 4.0208868980407715 }, { "id": "identity_281234", "image": "TextVQA/images/bfcdcb3bf1013ebe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the book written by Dean Young?" }, { "from": "gpt", "value": "The answer is recklessness" } ], "clip_score": 26.65625, "loss": 5.510993480682373 }, { "id": "identity_338111", "image": "A-OKVQA/images/93378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this place sell?\nChoices:\n(A) dogs\n(B) fish\n(C) cats\n(D) birds" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.2109375, "loss": 4.307785511016846 }, { "id": "identity_327438", "image": "ScienceQA/images/6292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nampa\n(B) Richmond\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.78125, "loss": 4.208571910858154 }, { "id": "identity_288349", "image": "ScienceQA/images/697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this cup?\nChoices:\n(A) plastic\n(B) metal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.171875, "loss": 4.034592151641846 }, { "id": "identity_15729", "image": "AI2D/images/3265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does G represent in the diagram?\nChoices:\n(A) mitochondrion\n(B) none of the above\n(C) lysosome\n(D) centriole" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.25, "loss": 4.05430793762207 }, { "id": "identity_206029", "image": "VQA-AS/images/4555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are on the mantelpiece above the fireplace?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.15625, "loss": 4.719627857208252 }, { "id": "identity_10874", "image": "IconQA/images/58136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is fourth?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.25, "loss": 4.384588718414307 }, { "id": "identity_278147", "image": "A-OKVQA/images/362198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make the sink?\nOptions:\n(A) stainless steel\n(B) wood\n(C) plastic\n(D) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.84375, "loss": 4.688902854919434 }, { "id": "identity_302806", "image": "PMC-VQA/images/PMC8138997_Fig4_446783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do f, g indicate in the caption?\nChoices:\n(A) Antibodies staining results\n(B) 3D reconstruction of the inflammatory infiltrates\n(C) 3D reconstruction of vertebrobasilar system after angiography\n(D) A new technique for detecting TNFa" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.215322017669678 }, { "id": "identity_248903", "image": "A-OKVQA/images/115145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible uses for the cable in the lower right socket, and how does its type influence these uses?" }, { "from": "gpt", "value": "The answer is Transmitting TV signals, internet data, and other data communication" } ], "clip_score": 16.859375, "loss": 4.0069169998168945 }, { "id": "identity_54287", "image": "MapQA/images/map_1551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a career percentage between 28.3%-29.1%?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 33.625, "loss": 4.122591972351074 }, { "id": "identity_6344", "image": "FigureQA/images/82791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.140625, "loss": 4.293322563171387 }, { "id": "identity_116117", "image": "VQA2.0/images/521867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the door frame?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.25, "loss": 4.396632194519043 }, { "id": "identity_35338", "image": "PMC-VQA/images/PMC4658336_f0015_447443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) A protein complex \n(B) A microorganism \n(C) A 3D printer \n(D) A chemical compound. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.96875, "loss": 4.204703330993652 }, { "id": "identity_21907", "image": "A-OKVQA/images/567877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man using the device in his hand to do?\nChoices:\n(A) light candle\n(B) eat cake\n(C) cut cake\n(D) serve cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.1015625, "loss": 4.197780609130859 }, { "id": "identity_302605", "image": "A-OKVQA/images/507854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these vehicles transport?\nChoices:\n(A) cars\n(B) animals\n(C) people\n(D) coal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1484375, "loss": 4.2272491455078125 }, { "id": "identity_320991", "image": "A-OKVQA/images/232546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of business is this white sign advertising?\nChoices:\n(A) pharmacy\n(B) car shop\n(C) route store\n(D) grocer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.0281267166137695 }, { "id": "identity_323558", "image": "TextVQA/images/6a2f00b32601f7e7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in this bottle with the red top?" }, { "from": "gpt", "value": "The answer is casera" } ], "clip_score": 24.484375, "loss": 4.537456035614014 }, { "id": "identity_233767", "image": "DVQA/images/bar_train_00042069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total number of preferences across all objects?" }, { "from": "gpt", "value": "The answer is reform" } ], "clip_score": 32.625, "loss": 4.425812244415283 }, { "id": "identity_264600", "image": "PMC-VQA/images/PMC8727688_f2_156113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location were the original metastatic lesions found? \nChoices:\n(A) Left lower chest wall\n(B) Right lower chest wall\n(C) Upper chest wall\n(D) Spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.421875, "loss": 4.645060062408447 }, { "id": "identity_329466", "image": "PMC-VQA/images/PMC6818735_FIG3_57736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrow in this image?\nChoices:\n(A) The full extent of the knife's intracranial route\n(B) A tumor in the brain\n(C) A normal brain tissue\n(D) The right lateral intraventricular hemorrhage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.9375, "loss": 4.296839714050293 }, { "id": "identity_33285", "image": "PMC-VQA/images/PMC5360938_fig4_143851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What artery is marked by the black arrows?\nChoices:\n(A) basilar artery \n(B) anterior cerebral artery \n(C) middle cerebral artery \n(D) posterior communicating artery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.037110805511475 }, { "id": "identity_269085", "image": "Super-CLEVR/images/superCLEVR_new_001010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the tiny airliner have the same color as the small metallic road bike located on the left side of the purple metallic dirtbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.71875, "loss": 5.358625411987305 }, { "id": "identity_312238", "image": "AI2D/images/1137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the roots to this part?\nChoices:\n(A) Stem\n(B) Stigma\n(C) Filament\n(D) Anther" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.3125, "loss": 4.453068733215332 }, { "id": "identity_326742", "image": "IconQA/images/79958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a tractor. What is the fifth image?\nChoices:\n(A) tractor\n(B) barn\n(C) sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0625, "loss": 4.728365421295166 }, { "id": "identity_259804", "image": "ScienceQA/images/15220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the west and what is its capital city?" }, { "from": "gpt", "value": "The answer is Lincoln" } ], "clip_score": 29.65625, "loss": 4.785891532897949 }, { "id": "identity_318527", "image": "FigureQA/images/56033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.890625, "loss": 4.057171821594238 }, { "id": "identity_7654", "image": "AI2D/images/1936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food web, the killer whale is a\nChoices:\n(A) primary consumer\n(B) producer\n(C) tertiary consumer\n(D) secondary consumer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.90625, "loss": 4.10754919052124 }, { "id": "identity_23850", "image": "PMC-VQA/images/PMC4157140_fig1_318564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen next to the anterior horn of the lateral ventricle on the right? \nChoices:\n(A) numerous small, well-delineated cysts \n(B) signal abnormalities \n(C) nothing abnormal \n(D) additional minor abnormalities " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.8125, "loss": 4.542645454406738 }, { "id": "identity_278332", "image": "FigureQA/images/7575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.8125, "loss": 4.234846591949463 }, { "id": "identity_112766", "image": "Super-CLEVR/images/superCLEVR_new_000803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is present in the image besides airplanes?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 24.578125, "loss": 4.0648064613342285 }, { "id": "identity_297253", "image": "PMC-VQA/images/PMC4758959_fig11_3409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What measurement does the scale bar in the image represent?\nChoices:\n(A) 10 μm\n(B) 1 μm\n(C) 2 μm\n(D) 5 μm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.453125, "loss": 4.284289360046387 }, { "id": "identity_117014", "image": "Super-CLEVR/images/superCLEVR_new_001554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have exactly three wheels?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.21875, "loss": 4.341774940490723 }, { "id": "identity_6974", "image": "PMC-VQA/images/PMC6462305_f0006_464917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding on the MRI image? \nChoices:\n(A) No abnormalities detected \n(B) Residual lesions \n(C) Acute bleed \n(D) Cerebral atrophy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.90625, "loss": 4.251828193664551 }, { "id": "identity_19890", "image": "PMC-VQA/images/PMC7215337_ijms-21-02817-f007_166978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the interlobular septum? \nChoices:\n(A) Organized lymphoid tissue \n(B) Degenerated lymphoid tissue \n(C) Adipose tissue \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.09375, "loss": 4.475224494934082 }, { "id": "identity_33023", "image": "PMC-VQA/images/PMC4332747_Fig4_359939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to evaluate the dislodged bone cement? \nChoices:\n(A) Magnetic Resonance Imaging (MRI). \n(B) Computed Tomography (CT) scan. \n(C) X-ray. \n(D) Ultrasound. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.046875, "loss": 4.08682107925415 }, { "id": "identity_11240", "image": "AI2D/images/4436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of fin H?\nChoices:\n(A) Anal fin\n(B) Pectoral fin\n(C) Dorsal fin\n(D) Pelvic fin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.6875, "loss": 4.688403606414795 }, { "id": "identity_188670", "image": "VQA2.0/images/520199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are there in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 19.71875, "loss": 4.398173809051514 }, { "id": "identity_307895", "image": "PlotQA/images/112850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the legend located in the graph?" }, { "from": "gpt", "value": "The answer is center right" } ], "clip_score": 18.296875, "loss": 4.545301914215088 }, { "id": "identity_327675", "image": "PMC-VQA/images/PMC3661911_Fig4_206336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the open arrows indicate in the perfusion maps?\nChoices:\n(A) The border of the lesion \n(B) The perfusion deficit \n(C) The healthy tissue \n(D) The computer-generated lesion map" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.3577117919921875 }, { "id": "identity_191546", "image": "TextVQA/images/e9d6c10b6c12ee54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the battery designed for a specific country or region according to the label?" }, { "from": "gpt", "value": "The answer is Made in Korea" } ], "clip_score": 21.828125, "loss": 4.17671537399292 }, { "id": "identity_22902", "image": "A-OKVQA/images/35897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the SUV moving over?\nChoices:\n(A) lane ending\n(B) wrong way\n(C) lost control\n(D) being silly" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.546875, "loss": 4.258445739746094 }, { "id": "identity_10375", "image": "A-OKVQA/images/176519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they wearing wetsuits?\nChoices:\n(A) for fun\n(B) cold water\n(C) easier finding\n(D) showing off" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.247890472412109 }, { "id": "identity_325187", "image": "PMC-VQA/images/PMC2679713_F1_38145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the imaging findings of a certain condition on FLAIR images?\nChoices:\n(A) Decreased signal intensity in some areas of the brain\n(B) Bright signal in some areas of the brain\n(C) Presence of fluid-filled spaces in the brain\n(D) Thickening of some brain structures" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.15625, "loss": 4.744173526763916 }, { "id": "identity_208906", "image": "TextVQA/images/40a3a8b0139e0259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.875, "loss": 4.28753137588501 }, { "id": "identity_311568", "image": "ScienceQA/images/243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Jamaica\n(B) Haiti\n(C) The Bahamas\n(D) Grenada" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.076561450958252 }, { "id": "identity_227147", "image": "A-OKVQA/images/512838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object in the center of the chips stand logo, and how many spokes does it have?" }, { "from": "gpt", "value": "The answer is Blue, eight" } ], "clip_score": 8.4296875, "loss": 4.770071983337402 }, { "id": "identity_26096", "image": "FigureQA/images/7854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 4.555152893066406 }, { "id": "identity_305418", "image": "TextVQA/images/8c00cc500f30fe07.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of player #8?" }, { "from": "gpt", "value": "The answer is prust" } ], "clip_score": 25.234375, "loss": 4.322831153869629 }, { "id": "identity_309599", "image": "Super-CLEVR/images/superCLEVR_new_000319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the blue object to the right of the blue matte bicycle that is to the left of the motorbike in front of the cyan rubber object?" }, { "from": "gpt", "value": "The answer is mountain bike" } ], "clip_score": 29.234375, "loss": 4.292841911315918 }, { "id": "identity_219810", "image": "A-OKVQA/images/431954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of doll is the person holding?" }, { "from": "gpt", "value": "The answer is Uncle Sam doll" } ], "clip_score": 26.453125, "loss": 4.411981105804443 }, { "id": "identity_13335", "image": "A-OKVQA/images/386908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bike is this?\nChoices:\n(A) chopper\n(B) tandem\n(C) mountain\n(D) electric" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.234556198120117 }, { "id": "identity_277927", "image": "DocVQA/images/nmwn0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Amber Howard currently located?" }, { "from": "gpt", "value": "The answer is advertising director" } ], "clip_score": 19.921875, "loss": 4.277486324310303 }, { "id": "identity_8787", "image": "VizWiz/images/VizWiz_train_00010982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: record: can you tell me what kind of box this is. What is the name of it and the brand, please." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.5, "loss": 4.2581257820129395 }, { "id": "identity_306319", "image": "TQA/images/abc_question_images/human_system_ear_12880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the ear drum?\nChoices:\n(A) R\n(B) D\n(C) N\n(D) L" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.484375, "loss": 4.20062255859375 }, { "id": "identity_321646", "image": "VizWiz/images/VizWiz_train_00016462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a twelve volt transformer? It should indicate twelve v. One two v." }, { "from": "gpt", "value": "The answer is bc" } ], "clip_score": 23.078125, "loss": 5.421741485595703 }, { "id": "identity_319901", "image": "AI2D/images/2137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which stage is the star located at D?\nChoices:\n(A) stellar nebula\n(B) supernova\n(C) planetary nebula\n(D) black hole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.796875, "loss": 4.019795894622803 }, { "id": "identity_76356", "image": "ScienceQA/images/7351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states share a border with the highlighted state?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.796875, "loss": 4.304780006408691 }, { "id": "identity_117909", "image": "A-OKVQA/images/442322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.21875, "loss": 4.273869514465332 }, { "id": "identity_31359", "image": "PMC-VQA/images/PMC6636264_f3_10016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the image labeled as probable UIP? \nChoices:\n(A) Due to the presence of honeycombing \n(B) Due to the absence of honeycombing \n(C) Due to the presence of other pulmonary pattern \n(D) Due to the quality of the image. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 5.352697372436523 }, { "id": "identity_140919", "image": "DocVQA/images/mghk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who incurred the expenses?" }, { "from": "gpt", "value": "The answer is Roger Adams" } ], "clip_score": 20.828125, "loss": 4.672159671783447 }, { "id": "identity_299318", "image": "PMC-VQA/images/PMC7797455_FIG3_339775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the yellow arrows in the CT images?\nChoices:\n(A) pleural effusion \n(B) consolidation in lower lobes \n(C) air bronchogram sign \n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 4.199374198913574 }, { "id": "identity_60362", "image": "DocVQA/images/jpyv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the memorandum addressed to?" }, { "from": "gpt", "value": "The answer is Claud Fleet" } ], "clip_score": 28.796875, "loss": 4.068291664123535 }, { "id": "identity_290849", "image": "FigureQA/images/7947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Medium Seafoam or Forest Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.34375, "loss": 4.434357166290283 }, { "id": "identity_203137", "image": "TextVQA/images/0161ab1145014926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles have a portrait on their label?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.6875, "loss": 4.733870506286621 }, { "id": "identity_290199", "image": "PMC-VQA/images/PMC3411310_Fig1_148091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the lymphadenopathy in terms of calcification? \nChoices:\n(A) No calcification\n(B) Moderate calcification\n(C) Fine specks of calcification\n(D) Coarse calcification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.625, "loss": 4.03198766708374 }, { "id": "identity_35311", "image": "A-OKVQA/images/326936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What Item is a human most likely to trip over?\nChoices:\n(A) stool\n(B) firewood\n(C) fan\n(D) bleach" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.328125, "loss": 4.1948347091674805 }, { "id": "identity_297591", "image": "TQA/images/question_images/simple_machines_8197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the man turns the wheel in a clockwise direction, what is the resulting motion direction?\nChoices:\n(A) upward\n(B) counter-clockwise\n(C) downward\n(D) sideways" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.953125, "loss": 4.302358150482178 }, { "id": "identity_263358", "image": "TQA/images/question_images/muscle_fiber_7091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is responsible for connecting muscle tissue to bone: Perimysium, Epimysium, Tendon, or Muscle fiber?\nChoices:\n(A) Perimysium\n(B) Epimysium\n(C) Tendon\n(D) Muscle fiber" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.25, "loss": 4.037024974822998 }, { "id": "identity_27210", "image": "TabMWP/images/23351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A grocery store recorded how many customers visited the store each day. On which day did the grocery store have the most customers?\nChoices:\n(A) Wednesday\n(B) Thursday\n(C) Friday\n(D) Saturday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.90625, "loss": 4.149684906005859 }, { "id": "identity_37308", "image": "VizWiz/images/VizWiz_train_00023875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what the CAPTCHA is? The number on this screen." }, { "from": "gpt", "value": "The answer is subleave" } ], "clip_score": 29.734375, "loss": 4.998885631561279 }, { "id": "identity_300041", "image": "ScienceQA/images/1270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which rhetorical appeal is mainly used in this advertisement?\nChoices:\n(A) logos (reason)\n(B) ethos (character)\n(C) pathos (emotion)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.875, "loss": 4.211381912231445 }, { "id": "identity_328365", "image": "TextVQA/images/16b41d2c3ee1a9a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of beverage is visible?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 22.15625, "loss": 4.286071300506592 }, { "id": "identity_296001", "image": "Super-CLEVR/images/superCLEVR_new_000010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the material of the blue scooter and the brown object behind the bicycle the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.28125, "loss": 5.7402544021606445 }, { "id": "identity_18544", "image": "A-OKVQA/images/327726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the black couch against the wall made out of?\nChoices:\n(A) wool\n(B) denim\n(C) nylon\n(D) leather" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.109375, "loss": 4.421963691711426 }, { "id": "identity_289684", "image": "PMC-VQA/images/PMC8011180_Fig1_407190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reference of the arrow in the T2/FLAIR sequence? Is it pointing to:\n(A) grey matter\n(B) white matter\n(C) cerebral fluid\n(D) bone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.90625, "loss": 4.655297756195068 }, { "id": "identity_218694", "image": "A-OKVQA/images/407522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees visible on the mountain?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.84375, "loss": 5.744214057922363 }, { "id": "identity_304914", "image": "TQA/images/abc_question_images/photosynthesis_11312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the water?\nChoices:\n(A) Y\n(B) V\n(C) P\n(D) J" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.84375, "loss": 4.640442371368408 }, { "id": "identity_47380", "image": "DocVQA/images/spnf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of difference is indicated by a single asterisk (*) in the legend?" }, { "from": "gpt", "value": "The answer is Unusual difference" } ], "clip_score": 19.78125, "loss": 4.201861381530762 }, { "id": "identity_310438", "image": "Super-CLEVR/images/superCLEVR_new_000000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more small red cars to the left of the utility bike than big gray metal trucks?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.75, "loss": 4.9088287353515625 }, { "id": "identity_33270", "image": "ScienceQA/images/13234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Raleigh\n(B) Charlotte\n(C) Lincoln\n(D) Santa Fe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.46875, "loss": 4.303436279296875 }, { "id": "identity_24554", "image": "A-OKVQA/images/524470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the street using to get around?\nChoices:\n(A) scooter\n(B) skateboard\n(C) car\n(D) bike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.0, "loss": 4.182118892669678 }, { "id": "identity_253541", "image": "PMC-VQA/images/PMC5920057_Fig5_306554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which slice is used for assessing the phase-contrast values, and what is the approximate diameter of the circular object in the image in millimeters?" }, { "from": "gpt", "value": "The answer is Coronal, approximately 2 mm" } ], "clip_score": 28.421875, "loss": 4.131041049957275 }, { "id": "identity_322410", "image": "A-OKVQA/images/225271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the area where these individuals are walking?\nChoices:\n(A) crosswalk\n(B) sidewalk\n(C) runway\n(D) boulevard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.203125, "loss": 4.480231285095215 }, { "id": "identity_123138", "image": "A-OKVQA/images/216449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of printed material is on the table in front of the child on the right?" }, { "from": "gpt", "value": "The answer is A menu or activity sheet" } ], "clip_score": 20.453125, "loss": 4.6134538650512695 }, { "id": "identity_319658", "image": "VQA2.0/images/22536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the hair short?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.5, "loss": 4.337288856506348 }, { "id": "identity_194983", "image": "VQA2.0/images/3602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flower pots are visible on the left side of the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.390625, "loss": 4.347391605377197 }, { "id": "identity_197453", "image": "A-OKVQA/images/477807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the trailer?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.96875, "loss": 4.8329877853393555 }, { "id": "identity_225073", "image": "Super-CLEVR/images/superCLEVR_new_001192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are the same material as the tiny cruiser, and what are their shapes?" }, { "from": "gpt", "value": "The answer is Two vehicles; tiny cruiser and school bus" } ], "clip_score": 28.1875, "loss": 4.021365165710449 }, { "id": "identity_328529", "image": "A-OKVQA/images/5688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the unique feature of this item?\nChoices:\n(A) protection\n(B) grip\n(C) none\n(D) shadow" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.98828125, "loss": 5.06236457824707 }, { "id": "identity_289673", "image": "A-OKVQA/images/532976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the yellow truck engaged in?\nChoices:\n(A) transporting goods\n(B) receiving donation\n(C) mobile library\n(D) selling food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.4375, "loss": 4.133626937866211 }, { "id": "identity_330574", "image": "PMC-VQA/images/PMC3407708_F5_147299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kinds of images were taken of the cervical spine?\nChoices:\n(A) Sagittal and Coronal\n(B) Axial and Oblique\n(C) Transverse and MIP\n(D) Lateral and Computed Tomography" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.84375, "loss": 4.364710807800293 }, { "id": "identity_299579", "image": "VizWiz/images/VizWiz_train_00019004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does my coffee appear?" }, { "from": "gpt", "value": "The answer is brown milk in" } ], "clip_score": 26.53125, "loss": 5.305765628814697 }, { "id": "identity_297691", "image": "Super-CLEVR/images/superCLEVR_new_001231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large purple metal jets than big cruisers?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.015625, "loss": 4.7657880783081055 }, { "id": "identity_31021", "image": "PMC-VQA/images/PMC2981854_F0002_78516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the axial contrast-enhanced CT image show? \nChoices:\n(A) MRI image \n(B) PET image \n(C) CT image \n(D) X-ray image " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.125, "loss": 4.730278968811035 }, { "id": "identity_2765", "image": "AI2D/images/2957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the diagram represent?\nChoices:\n(A) neuron\n(B) proton\n(C) none of the above\n(D) electron" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 4.34570837020874 }, { "id": "identity_38337", "image": "PMC-VQA/images/PMC4592368_Fig1_429401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging is used to produce the middle column images? \nChoices:\n(A) T1-weighted MPRAGE images \n(B) raw axial T2-weighted FLAIR images \n(C) both T1-weighted MPRAGE and axial T2-weighted FLAIR images \n(D) a different type of imaging technique " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.394664287567139 }, { "id": "identity_30014", "image": "A-OKVQA/images/383212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of a number is on the bike?\nChoices:\n(A) composite\n(B) odd\n(C) even\n(D) negative" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.640625, "loss": 4.343213081359863 }, { "id": "identity_233601", "image": "Super-CLEVR/images/superCLEVR_new_001474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the green SUV in front of the gray object larger than the purple car that is to the right of the brown double bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.671875, "loss": 5.234798431396484 }, { "id": "identity_328276", "image": "TabMWP/images/5470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.8125, "loss": 4.332299709320068 }, { "id": "identity_262381", "image": "VQA2.0/images/304997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is located behind the home plate?" }, { "from": "gpt", "value": "The answer is w" } ], "clip_score": 22.671875, "loss": 4.465807914733887 }, { "id": "identity_208144", "image": "PMC-VQA/images/PMC6548024_fig5_488939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are labeled with \"DAI\"?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.6796875, "loss": 4.5654754638671875 }, { "id": "identity_8914", "image": "TQA/images/abc_question_images/cycle_water_14974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents the evaporation from ocean?\nChoices:\n(A) H\n(B) E\n(C) W\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.828125, "loss": 4.361981391906738 }, { "id": "identity_203351", "image": "TextVQA/images/e5f246059e8f7c11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many control knobs are visible on the television?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.90625, "loss": 4.411951541900635 }, { "id": "identity_23667", "image": "DVQA/images/bar_train_00007475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dataset does the darkorange color represent?" }, { "from": "gpt", "value": "The answer is tax" } ], "clip_score": 26.984375, "loss": 4.3176727294921875 }, { "id": "identity_6480", "image": "FigureQA/images/49058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.40625, "loss": 4.150280952453613 }, { "id": "identity_196533", "image": "TextVQA/images/1a565a40e6fa8300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main visual element behind the text on the magazine cover?" }, { "from": "gpt", "value": "The answer is A rising curve or graph" } ], "clip_score": 24.78125, "loss": 4.375670433044434 }, { "id": "identity_288771", "image": "PMC-VQA/images/PMC5167692_F1_106251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the arrows in the MRI images? \nChoices:\n(A) Normal brain tissue \n(B) Tumors \n(C) Blood flow \n(D) Inflammation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 4.505621910095215 }, { "id": "identity_313466", "image": "Super-CLEVR/images/superCLEVR_new_001545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer shiny aeroplanes in front of the yellow thing than small things behind the red thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.640625, "loss": 5.103377819061279 }, { "id": "identity_322980", "image": "ScienceQA/images/17539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) sweet\n(B) sour\n(C) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.90625, "loss": 4.246187686920166 }, { "id": "identity_327132", "image": "PMC-VQA/images/PMC4368839_Fig2_370264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging techniques can measure a thin layer of neointima?\nChoices:\n(A) IVOCT \n(B) Micro-CT \n(C) MRI \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.0625, "loss": 4.027225017547607 }, { "id": "identity_307767", "image": "TQA/images/question_images/hair_follicles_6987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located before the Hair shaft?\nChoices:\n(A) Sebaceous gland\n(B) Inner root\n(C) Arrector pili muscle\n(D) Outer root" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.25, "loss": 4.640367031097412 }, { "id": "identity_290553", "image": "A-OKVQA/images/556476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located beside the lamppost?\nChoices:\n(A) dog\n(B) elephant\n(C) cat\n(D) bench" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.828125, "loss": 4.131759166717529 }, { "id": "identity_270329", "image": "PMC-VQA/images/PMC8580671_fig2_107722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the X-ray image depict in terms of appearance?\nChoices:\n(A) Cloudy \n(B) Cloudy Bone \n(C) Bone in bone \n(D) Fatty Tissues" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.3125, "loss": 4.721907138824463 }, { "id": "identity_15603", "image": "Super-CLEVR/images/superCLEVR_new_001528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a shiny motorbike left of the metallic scooter; is it the same size as the small blue school bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.15625, "loss": 5.152446269989014 }, { "id": "identity_13350", "image": "Super-CLEVR/images/superCLEVR_new_001376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the tiny metal thing that is behind the large metallic object have the same shape as the blue object that is in front of the matte scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.875, "loss": 5.232819557189941 }, { "id": "identity_307552", "image": "TQA/images/abc_question_images/human_system_eye_16126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents a clear, protective covering on the outside of something?\nChoices:\n(A) K\n(B) T\n(C) C\n(D) P" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.2890625, "loss": 4.3393144607543945 }, { "id": "identity_279864", "image": "ScienceQA/images/9810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following organisms, which one contains matter that was previously part of the persimmon tree?\nChoices:\n(A) gray fox\n(B) silver maple\n(C) beaver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.09375, "loss": 5.161910057067871 }, { "id": "identity_71792", "image": "TQA/images/question_images/velocity_time_graphs_8214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which graph represents an object that is speeding up over time?" }, { "from": "gpt", "value": "The answer is Bottom left graph under Constant acceleration" } ], "clip_score": 29.1875, "loss": 4.015000820159912 }, { "id": "identity_263067", "image": "A-OKVQA/images/52689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what elevation could a person potentially reach while riding on the ski lift?\nChoices:\n(A) same\n(B) higher\n(C) none\n(D) lower" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0, "loss": 4.118774890899658 }, { "id": "identity_106561", "image": "A-OKVQA/images/296233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vest of the person second from the left?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.171875, "loss": 4.433135509490967 }, { "id": "identity_318879", "image": "DVQA/images/bar_train_00187612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is desire" } ], "clip_score": 24.359375, "loss": 4.228523254394531 }, { "id": "identity_209832", "image": "VQA2.0/images/333984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are observing the skateboarder from the edge of the skate bowl?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 32.40625, "loss": 4.403120994567871 }, { "id": "identity_53700", "image": "DocVQA/images/kkmm0020_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the type of retail segment for the account?" }, { "from": "gpt", "value": "The answer is R = RETAIL" } ], "clip_score": 19.984375, "loss": 4.06254768371582 }, { "id": "identity_318935", "image": "PMC-VQA/images/PMC4300405_f0010_351431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lamina of the spinal cord is this staining observed?\nChoices:\n(A) Lamina I \n(B) Lamina III \n(C) Lamina V \n(D) Lamina VII" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.796875, "loss": 4.019384860992432 }, { "id": "identity_288656", "image": "A-OKVQA/images/487456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the machinery being white?\nChoices:\n(A) toilet paper\n(B) snow covered\n(C) styrofoam\n(D) painted white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.4375, "loss": 4.8469672203063965 }, { "id": "identity_301772", "image": "ScienceQA/images/830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Montpelier\n(B) Lansing\n(C) Baton Rouge\n(D) Birmingham" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5625, "loss": 4.125903606414795 }, { "id": "identity_299977", "image": "A-OKVQA/images/263176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What actions are the individuals prepared to take?\nChoices:\n(A) eat\n(B) run\n(C) deplane\n(D) board" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.2578125, "loss": 4.034878253936768 }, { "id": "identity_273684", "image": "PMC-VQA/images/PMC6199147_FIG2_385376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of medical imaging was utilized to capture this image?\nChoices:\n(A) X-ray\n(B) MRI\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.734375, "loss": 4.2938642501831055 }, { "id": "identity_325552", "image": "FigureQA/images/98804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.15625, "loss": 4.189608573913574 }, { "id": "identity_195499", "image": "PMC-VQA/images/PMC7693123_diagnostics-10-00899-f003_306457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Transverse section, patient's right on image left" } ], "clip_score": 30.359375, "loss": 4.1690239906311035 }, { "id": "identity_330159", "image": "A-OKVQA/images/135348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name for this kind of clothing?\nChoices:\n(A) hawaiian\n(B) american\n(C) russian\n(D) british" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.171875, "loss": 4.757883548736572 }, { "id": "identity_1767", "image": "DocVQA/images/phxj0037_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What should be used to avoid increase in NNk formation?" }, { "from": "gpt", "value": "The answer is low nitrate tobaccos" } ], "clip_score": 19.3125, "loss": 4.264032363891602 }, { "id": "identity_145910", "image": "DocVQA/images/txpp0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the person from Benton Harbor, Mi.?" }, { "from": "gpt", "value": "The answer is Tatter" } ], "clip_score": 19.71875, "loss": 5.029491901397705 }, { "id": "identity_321826", "image": "AI2D/images/4707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Designating geometric lines or planes by using what?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) perfoliate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.03125, "loss": 4.010843276977539 }, { "id": "identity_24952", "image": "PMC-VQA/images/PMC4575171_pone.0138226.g003_424927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is highlighted in the axial section? \nChoices:\n(A) Muscle \n(B) Cartilage \n(C) Bone \n(D) Tendon " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.375, "loss": 4.175200462341309 }, { "id": "identity_4971", "image": "IconQA/images/91694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 10:00\n(B) 2:00" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.145017147064209 }, { "id": "identity_272504", "image": "PMC-VQA/images/PMC6510869_Fig3_477973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modality is utilized to assess disability status?\nChoices:\n(A) T2-weighted image\n(B) Proton Density weighted image\n(C) 3D T1-weighted image\n(D) Fluid Attenuated Inversion Recovery (FLAIR) image." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 4.128332614898682 }, { "id": "identity_104350", "image": "VQA2.0/images/531253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paddle boats are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.125, "loss": 4.482242584228516 }, { "id": "identity_22686", "image": "A-OKVQA/images/133061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the animal to the right?\nChoices:\n(A) ribbon\n(B) crown\n(C) baby\n(D) rope" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.421875, "loss": 4.094937801361084 }, { "id": "identity_192206", "image": "A-OKVQA/images/281725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the jacket of the person on the right?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 17.375, "loss": 4.920609474182129 }, { "id": "identity_16618", "image": "TabMWP/images/35340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which event ends at 8.25 P.M.?\nChoices:\n(A) track meet\n(B) baseball game\n(C) women's tennis match\n(D) golf tournament" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.40625, "loss": 4.049294948577881 }, { "id": "identity_319812", "image": "FigureQA/images/40266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the largest section?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.313850402832031 }, { "id": "identity_289996", "image": "TabMWP/images/27164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nOptions:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.236113548278809 }, { "id": "identity_191184", "image": "PMC-VQA/images/PMC7718010_F1_315486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are the blue structures in the image?" }, { "from": "gpt", "value": "The answer is Cell nuclei" } ], "clip_score": 22.0625, "loss": 4.651413917541504 }, { "id": "identity_288307", "image": "PMC-VQA/images/PMC8075766_FIG2_428374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to produce the image shown?\nOptions:\n(A) Computed Tomography (CT)\n(B) Magnetic Resonance Imaging (MRI)\n(C) X-Ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.78125, "loss": 4.081579685211182 }, { "id": "identity_307692", "image": "PMC-VQA/images/PMC7251740_Fig1_178955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the origin of the mass observed in the imaging? \nChoices:\n(A) Left ventricle \n(B) Aorta \n(C) Right atrium \n(D) Left atrium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.875, "loss": 4.317776679992676 }, { "id": "identity_247220", "image": "VQA2.0/images/324246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the elements visible in the image, describe the likely location and purpose of the structure in the foreground" }, { "from": "gpt", "value": "The answer is Beach parking management" } ], "clip_score": 32.625, "loss": 4.700104713439941 }, { "id": "identity_283467", "image": "A-OKVQA/images/190805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which position, among the top, right, bottom, and left, contains the fruit or vegetable with the highest potassium content?\nChoices:\n(A) top\n(B) right\n(C) bottom\n(D) left" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.921875, "loss": 4.031249523162842 }, { "id": "identity_235273", "image": "PMC-VQA/images/PMC3097633_F6_95919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are visible in the image, and how does their orientation confirm the patient's position during image acquisition?" }, { "from": "gpt", "value": "The answer is Prone position confirmed by anatomical orientation" } ], "clip_score": 27.5, "loss": 4.289943695068359 }, { "id": "identity_268780", "image": "AI2D/images/4809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe something that is made up of multiple parts?\nChoices:\n(A) Compond\n(B) Round\n(C) Helicord\n(D) Flat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.017890930175781 }, { "id": "identity_24925", "image": "PMC-VQA/images/PMC8238105_f1_481627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to obtain the image shown in the caption? \nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.625, "loss": 4.083317756652832 }, { "id": "identity_6036", "image": "PMC-VQA/images/PMC5977127_jcm-07-00088-f003_321235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the hematoma? \nChoices:\n(A) Right thigh \n(B) Left thigh \n(C) Right arm \n(D) Left arm " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.234375, "loss": 4.2139129638671875 }, { "id": "identity_331641", "image": "IconQA/images/60950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.65625, "loss": 4.269729137420654 }, { "id": "identity_273491", "image": "ScienceQA/images/3563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Delaware?\nChoices:\n(A) Cheyenne\n(B) Austin\n(C) Dover\n(D) Georgetown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.546875, "loss": 4.110668182373047 }, { "id": "identity_305263", "image": "FigureQA/images/88081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Brown the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1328125, "loss": 4.609339237213135 }, { "id": "identity_335428", "image": "A-OKVQA/images/66459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person looking at?\nChoices:\n(A) friend\n(B) oncoming traffic\n(C) police\n(D) lights" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.4375, "loss": 4.6368231773376465 }, { "id": "identity_292545", "image": "IconQA/images/76100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one if the first picture is a bear?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.249150276184082 }, { "id": "identity_108697", "image": "A-OKVQA/images/24755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many skiers are wearing red suits?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.921875, "loss": 4.3805131912231445 }, { "id": "identity_262997", "image": "DocVQA/images/zxvx0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From which companies are the accommodation and escort services required for process executives?" }, { "from": "gpt", "value": "The answer is other sugar companies" } ], "clip_score": 22.015625, "loss": 5.432310581207275 }, { "id": "identity_104602", "image": "PMC-VQA/images/PMC5972190_F6_319548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as 'SVN' in the image?" }, { "from": "gpt", "value": "The answer is Carries balance and spatial orientation information" } ], "clip_score": 19.125, "loss": 4.833978176116943 }, { "id": "identity_328163", "image": "Super-CLEVR/images/superCLEVR_new_000254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small brown matte object of the same shape as the red object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 13.3046875, "loss": 5.254207611083984 }, { "id": "identity_314474", "image": "TextVQA/images/1ea65daf0d469bfa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the individuals that get it?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 14.9921875, "loss": 4.983548164367676 }, { "id": "identity_22344", "image": "ScienceQA/images/10747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Saint Paul\n(B) Minneapolis\n(C) Columbus\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.25, "loss": 4.3480658531188965 }, { "id": "identity_291948", "image": "PMC-VQA/images/PMC6848038_Fig3_65836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the patient's body is depicted in the X-ray image: the hip joint, shoulder joint, knee joint, or elbow joint?\nChoices:\n(A) Hip joint \n(B) Shoulder joint \n(C) Knee joint \n(D) Elbow joint" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.625, "loss": 4.444878578186035 }, { "id": "identity_278051", "image": "A-OKVQA/images/512156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the hair of the woman standing next to the camera? Is it lemon, lime, apple, or carrot?\nChoices:\n(A) lemon\n(B) lime\n(C) apple\n(D) carrot" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.796875, "loss": 4.1620378494262695 }, { "id": "identity_237591", "image": "PMC-VQA/images/PMC5570225_fig2_206568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the regions labeled in the image and describe their functions. Then, determine which region is shown as the orange box in the inset image" }, { "from": "gpt", "value": "The answer is posterior midgut region" } ], "clip_score": 24.875, "loss": 4.453482627868652 }, { "id": "identity_121223", "image": "TextVQA/images/7b029946e37ae57d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the court wearing maroon jerseys?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.515625, "loss": 4.520908355712891 }, { "id": "identity_303564", "image": "FigureQA/images/89384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5078125, "loss": 4.73834753036499 }, { "id": "identity_264532", "image": "FigureQA/images/43697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.152453422546387 }, { "id": "identity_130711", "image": "Super-CLEVR/images/superCLEVR_new_000070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is to the right of the green motorbike and in front of the blue car?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.359375, "loss": 4.455738544464111 }, { "id": "identity_205963", "image": "A-OKVQA/images/301714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual carrots can be counted in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 21.625, "loss": 4.519923686981201 }, { "id": "identity_122274", "image": "VQA2.0/images/26652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible damage or wear on the bench?" }, { "from": "gpt", "value": "The answer is No visible damage" } ], "clip_score": 26.40625, "loss": 4.194344997406006 }, { "id": "identity_326941", "image": "A-OKVQA/images/372384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person trying to do?\nChoices:\n(A) bounce pass\n(B) check mate\n(C) homerun\n(D) serve" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.03125, "loss": 4.493076324462891 }, { "id": "identity_192286", "image": "PMC-VQA/images/PMC5143741_fig1_103448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the number of distinct anatomical structures present in this image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 23.03125, "loss": 4.69347620010376 }, { "id": "identity_293387", "image": "PMC-VQA/images/PMC5648941_F1_229187.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of fracture displayed in the radiograph:\n(A) Colles fracture\n(B) Intertrochanteric fracture\n(C) Femoral neck fracture\n(D) Greenstick fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 4.627121925354004 }, { "id": "identity_279648", "image": "TQA/images/question_images/greenhouse_effect_6942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element in the greenhouse effect causes the reabsorption of rays among the following choices? \n(A) greenhouse gases\n(B) infra-red rays\n(C) atmosphere\n(D) sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.96875, "loss": 4.07023811340332 }, { "id": "identity_317201", "image": "A-OKVQA/images/201596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is making the dog rest its head on the side of the boat?\nChoices:\n(A) exhaustion\n(B) laziness\n(C) command\n(D) boredom" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.96875, "loss": 4.053433418273926 }, { "id": "identity_324570", "image": "ScienceQA/images/4462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this place?\nChoices:\n(A) Massachusetts\n(B) Vermont\n(C) Georgia\n(D) New Hampshire" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.609375, "loss": 4.187219619750977 }, { "id": "identity_26672", "image": "PMC-VQA/images/PMC9363346_Fig6_367474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which planes are depicted in the top panel? \nChoices:\n(A) Horizontal, sagittal and frontal \n(B) Sagittal, coronal and axial \n(C) Horizontal, coronal and sagittal \n(D) Axial, frontal and coronal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1171875, "loss": 4.015040397644043 }, { "id": "identity_306844", "image": "TQA/images/abc_question_images/hair_follicles_16997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the papilla?\nChoices:\n(A) L\n(B) C\n(C) F\n(D) D" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.796875, "loss": 4.0922040939331055 }, { "id": "identity_211397", "image": "PMC-VQA/images/PMC2742518_F4_45854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the arrow in the image?" }, { "from": "gpt", "value": "The answer is Nerve root" } ], "clip_score": 27.78125, "loss": 4.050421237945557 }, { "id": "identity_326049", "image": "PMC-VQA/images/PMC8088723_Fig3_432110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the compression of this structure?\nChoices:\n(A) Lateral extension\n(B) Anterior extension\n(C) Posterior extension\n(D) Inferior extension" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.703125, "loss": 4.10952091217041 }, { "id": "identity_318147", "image": "A-OKVQA/images/306663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the white posts called in front of this place?\nChoices:\n(A) lightposts\n(B) structurals\n(C) columns\n(D) signposts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.796875, "loss": 4.377989292144775 }, { "id": "identity_63936", "image": "TQA/images/question_images/parts_worm_7334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many aortic arches are visible in the diagram?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 27.59375, "loss": 4.414997577667236 }, { "id": "identity_12362", "image": "FigureQA/images/24783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5, "loss": 4.19899845123291 }, { "id": "identity_335488", "image": "PMC-VQA/images/PMC4891828_Fig1_37404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which panel indicates a lung CT scan with a high likelihood of lung recruitment?\nChoices:\n(A) Upper panel \n(B) Lower panel \n(C) Middle panel \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.018305778503418 }, { "id": "identity_266176", "image": "FigureQA/images/50326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle greater than Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.4375, "loss": 4.144431114196777 }, { "id": "identity_308108", "image": "ScienceQA/images/17778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary persuasive appeal used in this ad?\nChoices:\n(A) pathos (emotion)\n(B) ethos (character)\n(C) logos (reason)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.7734375, "loss": 4.869821071624756 }, { "id": "identity_333134", "image": "PMC-VQA/images/PMC2562460_F5_28465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the muscle shown around this opening called?\nChoices:\n(A) circular muscle band\n(B) radial muscle band\n(C) longitudinal muscle band\n(D) diagonal muscle band" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.484375, "loss": 4.029629707336426 }, { "id": "identity_36274", "image": "PMC-VQA/images/PMC4418099_Fig1_383036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the hematoma located in the patient's brain? \nChoices:\n(A) Right frontal scalp. \n(B) Left frontal scalp. \n(C) Occipital scalp. \n(D) Temporal scalp. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 4.003678798675537 }, { "id": "identity_125692", "image": "TextVQA/images/7ebc2a7ca5753975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible on top of the building?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.875, "loss": 4.634815692901611 }, { "id": "identity_139814", "image": "DocVQA/images/fspw0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the nature of the complaint filed by the consumer?" }, { "from": "gpt", "value": "The answer is Cigarettes caused dry mouth and lips, sneezing, achy head - general ill feeling also harsh taste" } ], "clip_score": 21.34375, "loss": 4.082027912139893 }, { "id": "identity_10405", "image": "ScienceQA/images/19185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) Antarctica\n(B) Africa\n(C) Europe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.4375, "loss": 4.416881561279297 }, { "id": "identity_222069", "image": "A-OKVQA/images/180488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski lifts are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.125, "loss": 4.630359649658203 }, { "id": "identity_255381", "image": "TextVQA/images/fbcc3a12ba8e8505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the country represented by the flag on the building, and what street does this building intersect?" }, { "from": "gpt", "value": "The answer is Canada, Georgia" } ], "clip_score": 35.1875, "loss": 5.0070929527282715 }, { "id": "identity_332787", "image": "PMC-VQA/images/PMC7707631_IJMS-45-485-g001.tif_311710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of lesions were identified in the brain?\nChoices:\n(A) Fibrotic\n(B) Hemorrhagic\n(C) Neurotic\n(D) Ischemic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.46875, "loss": 4.028393268585205 }, { "id": "identity_37221", "image": "PMC-VQA/images/PMC7881607_Fig1_367488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used? \nChoices:\n(A) CT scan \n(B) X-ray \n(C) Ultrasound \n(D) MRI " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.515625, "loss": 4.012691020965576 }, { "id": "identity_289695", "image": "Super-CLEVR/images/superCLEVR_new_001313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more matte motorbikes remaining than big brown matte dirtbikes to the left of the small metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.90625, "loss": 5.066256999969482 }, { "id": "identity_319947", "image": "TextVQA/images/ec0f390d16bb2d95.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mascot of the school this racer is representing?" }, { "from": "gpt", "value": "The answer is cougar " } ], "clip_score": 16.046875, "loss": 4.198655128479004 }, { "id": "identity_295202", "image": "A-OKVQA/images/85795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what is the girl seated?\nChoices:\n(A) box\n(B) chair\n(C) scooter\n(D) luggage bag" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.375, "loss": 4.209193229675293 }, { "id": "identity_204506", "image": "A-OKVQA/images/510806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the police motorcycle?" }, { "from": "gpt", "value": "The answer is Lawrence Police" } ], "clip_score": 28.34375, "loss": 4.228851795196533 }, { "id": "identity_37687", "image": "PMC-VQA/images/PMC7567400_f2_276956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is displayed in the top-left corner of the axial view? \nChoices:\n(A) The technician-defined tumor regions \n(B) The DeepMedic-defined tumor regions \n(C) The mean neuroradiologist score \n(D) The exam date. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.171875, "loss": 4.392942428588867 }, { "id": "identity_127786", "image": "TextVQA/images/00c346e16e2154ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are wearing gloves?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.53125, "loss": 4.792530059814453 }, { "id": "identity_269469", "image": "TabMWP/images/19190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which month did the farm equipment company produce the highest number of tractors, according to their monthly records?\nChoices:\n(A) December\n(B) January\n(C) February\n(D) March" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.3125, "loss": 4.177731513977051 }, { "id": "identity_285858", "image": "AI2D/images/1300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options represents stage 2 of this life cycle? \n(A) seed with root\n(B) young plant\n(C) seedling with leaf\n(D) seed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.0, "loss": 4.5595598220825195 }, { "id": "identity_334828", "image": "IconQA/images/8807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a boat. What is the second image?\nChoices:\n(A) crab\n(B) boat\n(C) bucket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.609375, "loss": 4.143786430358887 }, { "id": "identity_153008", "image": "TQA/images/abc_question_images/earth_poles_10168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'C' indicate in the diagram?" }, { "from": "gpt", "value": "The answer is Terminator line" } ], "clip_score": 31.859375, "loss": 4.318192005157471 }, { "id": "identity_550", "image": "AI2D/images/1249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The last quarter half occurs when?\nChoices:\n(A) After the waxing crescent\n(B) After the waning gibbous\n(C) Before the waxing gibbous\n(D) Before the new moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.3125, "loss": 4.123202800750732 }, { "id": "identity_317956", "image": "FigureQA/images/76791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.078125, "loss": 4.490767002105713 }, { "id": "identity_324854", "image": "A-OKVQA/images/384496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing?\nChoices:\n(A) stealing horses\n(B) plowing field\n(C) feeding horses\n(D) riding horses" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.078125, "loss": 4.653960704803467 }, { "id": "identity_271740", "image": "TQA/images/question_images/periodic_table_8159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element has an atomic number of 2?\n(A) Boron\n(B) Helium\n(C) Hydrogen\n(D) Carbon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.328125, "loss": 4.872273921966553 }, { "id": "identity_276146", "image": "VQA2.0/images/343383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the signs that indicate the weather is warm?" }, { "from": "gpt", "value": "The answer is lots people on beach" } ], "clip_score": 19.890625, "loss": 4.851978778839111 }, { "id": "identity_65672", "image": "AI2D/images/1846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 27.625, "loss": 4.323245048522949 }, { "id": "identity_257479", "image": "A-OKVQA/images/71582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most obvious thing that has been done to the unusual hair, and how does it compare to the other options in terms of visibility and impact?" }, { "from": "gpt", "value": "The answer is dyed" } ], "clip_score": 14.25, "loss": 4.695291042327881 }, { "id": "identity_316169", "image": "Super-CLEVR/images/superCLEVR_new_000401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of gray rubber cruisers that are behind some large matte motorbike and big rubber motorbikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.9375, "loss": 5.163724899291992 }, { "id": "identity_280126", "image": "FigureQA/images/79742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly states whether Dark Red is the smoothest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.84375, "loss": 4.326058864593506 }, { "id": "identity_23671", "image": "TextVQA/images/4bf43a7b2a898044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: according to the banner what type of center?" }, { "from": "gpt", "value": "The answer is life care" } ], "clip_score": 9.5625, "loss": 5.447850704193115 }, { "id": "identity_195398", "image": "PMC-VQA/images/PMC7407771_brainsci-10-00427-f002_228561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the tumor core as seen in the image?" }, { "from": "gpt", "value": "The answer is Irregular circular shape" } ], "clip_score": 36.96875, "loss": 4.000880718231201 }, { "id": "identity_73018", "image": "TQA/images/abc_question_images/cycle_water_11481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'L' indicate in the water cycle diagram?" }, { "from": "gpt", "value": "The answer is Collection or Runoff" } ], "clip_score": 36.0625, "loss": 4.213505268096924 }, { "id": "identity_294516", "image": "ScienceQA/images/19177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a mammal?\nOptions:\n(A) red kangaroo\n(B) American toad" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6640625, "loss": 4.626489639282227 }, { "id": "identity_21215", "image": "PMC-VQA/images/PMC8616551_cancers-13-05745-f006_120190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the bottom right image?\nChoices:\n(A) tumor spheroids growth \n(B) F-actin network and nuclei \n(C) CD31 Immunofluorescence staining \n(D) SEM images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.375, "loss": 4.72568941116333 }, { "id": "identity_305180", "image": "VQA2.0/images/169510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person sitting on?" }, { "from": "gpt", "value": "The answer is cooler" } ], "clip_score": 17.5625, "loss": 4.843129634857178 }, { "id": "identity_305730", "image": "PMC-VQA/images/PMC4956629_Fig12_54032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely diagnosis based on this image?\nChoices:\n(A) Appendicitis\n(B) Peritonitis\n(C) Pancreatitis\n(D) Cholecystitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.484025955200195 }, { "id": "identity_312242", "image": "TextVQA/images/3e569f2f4c818ed6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word on this page?" }, { "from": "gpt", "value": "The answer is upon" } ], "clip_score": 16.484375, "loss": 4.495217800140381 }, { "id": "identity_289371", "image": "ScienceQA/images/5485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) soft\n(B) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.270323753356934 }, { "id": "identity_288479", "image": "PMC-VQA/images/PMC6561366_f2_492359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the abbreviation CD stand for in terms of coronary arteries? \nChoices:\n(A) Left Coronary Artery\n(B) Trunk of Left Coronary Artery\n(C) Right Coronary Artery\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.273381233215332 }, { "id": "identity_331738", "image": "A-OKVQA/images/439991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with mustache about to do?\nChoices:\n(A) board bus\n(B) sightseeing\n(C) block people\n(D) get off" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.21875, "loss": 4.1222944259643555 }, { "id": "identity_316632", "image": "FigureQA/images/1738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.875, "loss": 4.74845027923584 }, { "id": "identity_307643", "image": "TQA/images/question_images/human_system_digestive_1395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the organ located between the esophagus and another part of the digestive system?\nChoices:\n(A) rectum\n(B) stomach\n(C) anus\n(D) large intestine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.3125, "loss": 4.347083568572998 }, { "id": "identity_270979", "image": "PMC-VQA/images/PMC8067020_cells-10-00769-f006_424748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the red arrows in the diagram? \nChoices:\n(A) indicating the position of G-actin in the nucleus \n(B) showing the more concentrated presence of G-actin in the cytoplasm \n(C) pointing to the location of G-actin in the cytoplasmic membrane \n(D) highlighting the denser accumulation of G-actin in the nucleus." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.34375, "loss": 4.262138366699219 }, { "id": "identity_331929", "image": "TextVQA/images/24bec4d1080a9171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first letter on the person's shirt?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 8.8203125, "loss": 4.203798294067383 }, { "id": "identity_63448", "image": "AI2D/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of bacteria in the pond ecosystem as shown in the image?" }, { "from": "gpt", "value": "The answer is Decomposers" } ], "clip_score": 29.609375, "loss": 4.021233081817627 }, { "id": "identity_317205", "image": "PMC-VQA/images/PMC4993752_F4_65235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) Hyphal growth in hair roots \n(B) Hyphal growth in control roots \n(C) No hyphal growth in hair roots \n(D) No hyphal growth in control roots" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.484375, "loss": 4.651211738586426 }, { "id": "identity_199918", "image": "VQA-AS/images/8985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are flying in the sky?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.859375, "loss": 4.496859550476074 }, { "id": "identity_290385", "image": "A-OKVQA/images/30413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what are the people waiting?\nChoices:\n(A) boarding train\n(B) taxi cab\n(C) airport bus\n(D) parade" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.328125, "loss": 4.34003210067749 }, { "id": "identity_290878", "image": "A-OKVQA/images/84474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this aircraft? Is it a seaplane, lear jet, fighter, or helicopter?\nChoices:\n(A) seaplane\n(B) lear jet\n(C) fighter\n(D) helicopter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.703125, "loss": 4.198610782623291 }, { "id": "identity_306704", "image": "TQA/images/abc_question_images/human_system_muscular_16157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the pectoralis major?\nChoices:\n(A) Y\n(B) C\n(C) E\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.515625, "loss": 4.427426815032959 }, { "id": "identity_2200", "image": "A-OKVQA/images/325184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which elevation are the elephants compared to those looking at them?\nChoices:\n(A) same\n(B) lower\n(C) changing\n(D) higher" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.28125, "loss": 4.227993011474609 }, { "id": "identity_313590", "image": "A-OKVQA/images/120810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the terms refer to?\nChoices:\n(A) road rules\n(B) rider code\n(C) biker terms\n(D) financing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.921875, "loss": 4.321707725524902 }, { "id": "identity_25282", "image": "A-OKVQA/images/501493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the orange cap doing?\nChoices:\n(A) scoring game\n(B) yelling insults\n(C) spotting cheaters\n(D) establishing rhythm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.65625, "loss": 4.185720443725586 }, { "id": "identity_292168", "image": "FigureQA/images/12172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Saddle Brown or Light Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.484375, "loss": 4.6315507888793945 }, { "id": "identity_283408", "image": "TextVQA/images/0193991401f83e64.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which language is being used on the bus?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 24.078125, "loss": 4.309035301208496 }, { "id": "identity_283881", "image": "PMC-VQA/images/PMC7956956_f4_390488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which plane was the imaging conducted?\nOptions:\n(A) Sagittal plane\n(B) Axial plane\n(C) Coronal plane\n(D) Oblique plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.5, "loss": 4.45644998550415 }, { "id": "identity_208818", "image": "A-OKVQA/images/337621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals in the image are wearing wings?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.875, "loss": 4.365644454956055 }, { "id": "identity_297100", "image": "IconQA/images/9061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the dotted line serve as a line of symmetry?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.796875, "loss": 4.200188159942627 }, { "id": "identity_297569", "image": "TQA/images/question_images/erosion_6851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What feature is situated directly beneath the Headland in the diagram of Erosion and Deposition by Flowing Water?\nChoices:\n(A) Stump\n(B) Blowhole\n(C) Arch\n(D) Stack" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.0625, "loss": 4.0920891761779785 }, { "id": "identity_275283", "image": "FigureQA/images/46764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Cornflower or Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.453125, "loss": 4.661538124084473 }, { "id": "identity_273503", "image": "IconQA/images/31783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the eighth picture in the sequence provided, where the first picture is a house.\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.190073013305664 }, { "id": "identity_225020", "image": "A-OKVQA/images/104309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the plumbers need to work on the pipes, what items will they need to move or remove to access the pipes, and what is the total number of these items?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.234375, "loss": 4.631886005401611 }, { "id": "identity_192667", "image": "VizWiz/images/VizWiz_train_00010756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object behind the mouse?" }, { "from": "gpt", "value": "The answer is A lamp" } ], "clip_score": 26.015625, "loss": 4.965533256530762 }, { "id": "identity_282968", "image": "VQA2.0/images/499294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this object a space shuttle?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.84375, "loss": 4.6486616134643555 }, { "id": "identity_28876", "image": "IconQA/images/32670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a barn. Which picture is ninth?\nChoices:\n(A) tractor\n(B) barn\n(C) sun" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.625, "loss": 4.1840901374816895 }, { "id": "identity_331436", "image": "PMC-VQA/images/PMC3943270_F2_271813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical condition depicted in the image?\nChoices:\n(A) Crohn's disease\n(B) Colon cancer\n(C) Familial adenomatous polyposis\n(D) Ulcerative Colitis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 4.321508884429932 }, { "id": "identity_25508", "image": "PMC-VQA/images/PMC8201921_Fig2_467736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body did the medical imaging focus on? \nChoices:\n(A) The heart \n(B) The brain \n(C) The lungs \n(D) The liver " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 4.667037010192871 }, { "id": "identity_234746", "image": "PMC-VQA/images/PMC7693825_jcm-09-03516-f002_306865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the grey color represent in the image, and how does it relate to the other colors present in terms of anatomical structures and surgical outcomes?" }, { "from": "gpt", "value": "The answer is Grey represents the native mandible, crucial for surgical planning" } ], "clip_score": 29.765625, "loss": 4.553822040557861 }, { "id": "identity_263766", "image": "PMC-VQA/images/PMC6137085_F1_367491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the experiment, which bone region was the subject of imaging?\nChoices:\n(A) proximal femoral metaphysis,\n(B) distal femoral metaphysis,\n(C) distal tibial metaphysis,\n(D) proximal tibial metaphysis." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 4.279231071472168 }, { "id": "identity_207481", "image": "A-OKVQA/images/115930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible behind the counter in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.796875, "loss": 4.473564624786377 }, { "id": "identity_307875", "image": "DVQA/images/bar_train_00046331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the sixth group of bars?" }, { "from": "gpt", "value": "The answer is hell" } ], "clip_score": 15.8671875, "loss": 4.4819841384887695 }, { "id": "identity_307977", "image": "PMC-VQA/images/PMC7339413_Fig3_206162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the soft tissue mass located in the patient?\nChoices:\n(A) Right scapula\n(B) Left scapula\n(C) Right clavicle\n(D) Left clavicle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.068382263183594 }, { "id": "identity_24263", "image": "A-OKVQA/images/360665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the animals near?\nChoices:\n(A) basilica\n(B) bridge\n(C) cliff\n(D) ocean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.28827428817749 }, { "id": "identity_27047", "image": "PMC-VQA/images/PMC7780869_F2_334052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the location of the ground glass density found in Patient 2's CT scan? \nChoices:\n(A) Upper lobes \n(B) Middle lobes \n(C) Lower lobes \n(D) Entire lung. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.8125, "loss": 4.254479885101318 }, { "id": "identity_293874", "image": "A-OKVQA/images/479620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is positioned above the horse?\nChoices:\n(A) old man\n(B) cat\n(C) girl\n(D) bird" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.421875, "loss": 4.455533027648926 }, { "id": "identity_264470", "image": "A-OKVQA/images/375902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual dressed in blue crossing in the given options?\nChoices:\n(A) their wires\n(B) finish line\n(C) lake\n(D) their letters" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6640625, "loss": 4.434699058532715 }, { "id": "identity_37072", "image": "A-OKVQA/images/349199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the boy holding up his shoe?\nChoices:\n(A) disposing sand\n(B) avoiding water\n(C) beating people\n(D) tying shoe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.234375, "loss": 4.185764312744141 }, { "id": "identity_318910", "image": "FigureQA/images/75983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0, "loss": 4.319976806640625 }, { "id": "identity_306412", "image": "VQA-AS/images/16947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this room on a lower level or main level?" }, { "from": "gpt", "value": "The answer is ground floor" } ], "clip_score": 25.046875, "loss": 4.164982318878174 }, { "id": "identity_2518", "image": "Super-CLEVR/images/superCLEVR_new_001672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing of the same color as the tiny matte scooter?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.71875, "loss": 4.876879692077637 }, { "id": "identity_271464", "image": "PMC-VQA/images/PMC4312452_Fig4_354171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image on the right?\nOptions:\n(A) Exterior of ventral valve;\n(B) Interior of dorsal valve;\n(C) Interior of ventral valve;\n(D) Exterior of dorsal valve." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.53125, "loss": 4.319605827331543 }, { "id": "identity_237108", "image": "TQA/images/question_images/fossils_9107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the part of the process where the remains are fully buried and the initial stages of fossilization begin, and describe the changes in the sediment layers from the previous stage to this stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.4375, "loss": 4.625847816467285 }, { "id": "identity_219106", "image": "PMC-VQA/images/PMC6494259_F1_473982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the lesion appear to be affecting the surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is No visible effect" } ], "clip_score": 32.71875, "loss": 4.4196577072143555 }, { "id": "identity_112699", "image": "Super-CLEVR/images/superCLEVR_new_000176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle that is closest to the camera?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.015625, "loss": 4.3877272605896 }, { "id": "identity_298126", "image": "PMC-VQA/images/PMC7923829_Fig8_381636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the structure identified as level 6 in the diagram?\nChoices:\n(A) Medial malleolus of the tibia \n(B) Trochlear surface of the talus \n(C) Tarsometatarsal joint \n(D) Calcaneus tuberosity." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.453125, "loss": 4.1618828773498535 }, { "id": "identity_27295", "image": "A-OKVQA/images/43344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the man sitting in the middle?\nChoices:\n(A) rafael nadal\n(B) jack perry\n(C) bo jackson\n(D) djimon hounsou" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.34375, "loss": 4.721530914306641 }, { "id": "identity_11225", "image": "AI2D/images/3549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter D represent?\nChoices:\n(A) Golgi Bodies\n(B) Vacuole\n(C) Mitochondria\n(D) Cytoplasm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.34375, "loss": 4.706942081451416 }, { "id": "identity_120323", "image": "Super-CLEVR/images/superCLEVR_new_001999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the green object in the center of the image?" }, { "from": "gpt", "value": "The answer is Car" } ], "clip_score": 24.6875, "loss": 4.038233280181885 }, { "id": "identity_35804", "image": "A-OKVQA/images/249288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: WHich is a chain burger restaurant sign?\nChoices:\n(A) gifts\n(B) mcdonalds\n(C) forever 21\n(D) disney" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.359375, "loss": 4.098151206970215 }, { "id": "identity_124879", "image": "VizWiz/images/VizWiz_train_00006572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object behind the beverage can?" }, { "from": "gpt", "value": "The answer is A lighter" } ], "clip_score": 32.375, "loss": 4.042932987213135 }, { "id": "identity_274617", "image": "AI2D/images/1813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the terrestrial food web provided, which organism is both a predator and prey? \nChoices:\n(A) deer\n(B) racoon\n(C) none\n(D) insects" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.640625, "loss": 4.0834641456604 }, { "id": "identity_29016", "image": "TabMWP/images/35355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.154701232910156 }, { "id": "identity_63126", "image": "TQA/images/question_images/earth_moon_phases_1257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many moon phases are depicted in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.515625, "loss": 4.289974689483643 }, { "id": "identity_269770", "image": "AI2D/images/2079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the provided food web, what is the primary source of energy? Is it the sun, bird, algae, or crab?\nChoices:\n(A) sun\n(B) bird\n(C) algae\n(D) crab" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.8125, "loss": 4.085168838500977 }, { "id": "identity_300744", "image": "AI2D/images/67.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outcome of decomposition?\nChoices:\n(A) diffusion\n(B) fossil fuels\n(C) combustion\n(D) carbon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.9375, "loss": 4.374181270599365 }, { "id": "identity_11754", "image": "AI2D/images/1232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the coldest season of the year?\nChoices:\n(A) Spring\n(B) Summer\n(C) Winter\n(D) Autumn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.59375, "loss": 4.102822780609131 }, { "id": "identity_337681", "image": "PMC-VQA/images/PMC3667362_Fig1_207823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do f and g indicate in the image?\nChoices:\n(A) Different types of tumors\n(B) Images of the patient after surgery\n(C) MRI taken after surgery\n(D) Tumor mass after resection" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.015625, "loss": 4.007468223571777 }, { "id": "identity_335044", "image": "AI2D/images/2900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the label B indicate?\nChoices:\n(A) Right Lung\n(B) Lobe Removed\n(C) Normal Lymph Nodes\n(D) Left Lung" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.40625, "loss": 4.86660099029541 }, { "id": "identity_326980", "image": "PMC-VQA/images/PMC6020204_Fig2_333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to reveal the tumor in this location?\nChoices:\n(A) CT scan\n(B) PET scan\n(C) T1-weighted axial images\n(D) T2-weighted axial images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.625, "loss": 4.232730388641357 }, { "id": "identity_264076", "image": "AI2D/images/3315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the letter H in the given options?\nChoices:\n(A) Centriole\n(B) Nucleolus\n(C) Smooth Endoplasmic Reticulum\n(D) Mitochondrian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 4.5811614990234375 }, { "id": "identity_26326", "image": "PMC-VQA/images/PMC4557590_f3-cln_70p654_420029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormal signal intensity extending from the cortex to the lateral ventricle in the provided image? \nChoices:\n(A) Normal brain tissue \n(B) White matter lesion \n(C) Air artifact \n(D) Blood clot " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.25, "loss": 4.633086681365967 }, { "id": "identity_299915", "image": "DocVQA/images/xgmf0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organization published the American Heart Association Cookbook?" }, { "from": "gpt", "value": "The answer is David McKay" } ], "clip_score": 23.234375, "loss": 4.3959574699401855 }, { "id": "identity_149061", "image": "DocVQA/images/jmwn0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the author's initials for all the comments?" }, { "from": "gpt", "value": "The answer is tns" } ], "clip_score": 17.515625, "loss": 4.695029258728027 }, { "id": "identity_328897", "image": "TabMWP/images/1351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.53125, "loss": 4.509093761444092 }, { "id": "identity_215629", "image": "A-OKVQA/images/256852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the pub on the left side of the image?" }, { "from": "gpt", "value": "The answer is The Gun Tavern" } ], "clip_score": 17.4375, "loss": 4.04676628112793 }, { "id": "identity_106671", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ceiling fans can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.0, "loss": 4.547614097595215 }, { "id": "identity_313389", "image": "FigureQA/images/71121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.390625, "loss": 4.0384345054626465 }, { "id": "identity_24281", "image": "AI2D/images/4524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main axis in the main stem called?\nChoices:\n(A) Rachis\n(B) Palmately\n(C) Trifoliate\n(D) Pinnately" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.9375, "loss": 4.242262840270996 }, { "id": "identity_294982", "image": "DocVQA/images/hpxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the memorandum addressed?" }, { "from": "gpt", "value": "The answer is Biles" } ], "clip_score": 20.3125, "loss": 4.430278301239014 }, { "id": "identity_124357", "image": "Super-CLEVR/images/superCLEVR_new_000968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the green car?" }, { "from": "gpt", "value": "The answer is Tram" } ], "clip_score": 17.5625, "loss": 4.663311004638672 }, { "id": "identity_326069", "image": "FigureQA/images/7854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.96875, "loss": 4.602105140686035 }, { "id": "identity_30569", "image": "ScienceQA/images/18112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the picture. Which word best describes how this honey tastes?\nChoices:\n(A) sweet\n(B) bitter\n(C) minty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.359375, "loss": 4.119776248931885 }, { "id": "identity_20294", "image": "A-OKVQA/images/490044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is this?\nChoices:\n(A) rally\n(B) competition\n(C) farmer's market\n(D) concert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.046875, "loss": 4.359385967254639 }, { "id": "identity_285683", "image": "A-OKVQA/images/51501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the woman using an umbrella?\nChoices:\n(A) decoration\n(B) sun protection\n(C) exercise\n(D) rain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.46875, "loss": 4.301276206970215 }, { "id": "identity_106066", "image": "Super-CLEVR/images/superCLEVR_new_001445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly above the blue car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.921875, "loss": 4.371087551116943 }, { "id": "identity_305135", "image": "FigureQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.015625, "loss": 4.262315273284912 }, { "id": "identity_65248", "image": "AI2D/images/4473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image, A or B, shows a part of the appendicular skeleton?" }, { "from": "gpt", "value": "The answer is Image A" } ], "clip_score": 29.78125, "loss": 4.09708309173584 }, { "id": "identity_38046", "image": "PMC-VQA/images/PMC5707219_Fig6_245867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the patient's medical history? \nChoices:\n(A) Diverticulitis \n(B) Appendicitis \n(C) Crohn's Disease \n(D) Pancreatitis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.648989200592041 }, { "id": "identity_291091", "image": "TQA/images/question_images/aquifers_6517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the rock level where water is injected for storage?\nChoices:\n(A) Injection bubble\n(B) Supply works\n(C) Aquifer\n(D) ASR borehole" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.671875, "loss": 5.222727298736572 }, { "id": "identity_315246", "image": "FigureQA/images/12624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the Red section the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.0625, "loss": 4.348732948303223 }, { "id": "identity_132286", "image": "TextVQA/images/c32bda4ba1ed67bc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many letters are in the brand name on the shirt?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.28125, "loss": 4.669739723205566 }, { "id": "identity_240867", "image": "VQA2.0/images/246880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of clocks can you identify in the image, and what are their distinct features?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 22.109375, "loss": 5.885534763336182 }, { "id": "identity_293883", "image": "A-OKVQA/images/350463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which genus does this fruit belong? Is it:\n(A) musa\n(B) malus\n(C) citrus\n(D) ficus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.3671875, "loss": 4.346846580505371 }, { "id": "identity_10926", "image": "FigureQA/images/91907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Slate have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.71875, "loss": 4.072392463684082 }, { "id": "identity_304360", "image": "DocVQA/images/txcx0227_18.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Whose resume is this?" }, { "from": "gpt", "value": "The answer is Ei Soon Cho" } ], "clip_score": 18.609375, "loss": 4.090577125549316 }, { "id": "identity_294745", "image": "PMC-VQA/images/PMC6165146_materials-11-01556-f011_375985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In (A), what is indicated by the arrow heads?\nChoices:\n(A) Location of original interface\n(B) Superior border of NBS\n(C) Implant location\n(D) Mandibular border." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.44141149520874 }, { "id": "identity_256403", "image": "DocVQA/images/xnlc0228_53.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What honorary titles and recognitions did Dr. Tom D. Spies and Mr. C. P. Loranz receive from Cuba, and on what dates were they awarded?" }, { "from": "gpt", "value": "The answer is Honorary memberships, Official in National Order, Comendador, various dates" } ], "clip_score": 20.484375, "loss": 4.2990336418151855 }, { "id": "identity_23237", "image": "TabMWP/images/6794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.426128387451172 }, { "id": "identity_17417", "image": "PMC-VQA/images/PMC6994645_Fig1_109414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the color Doppler create on the stone? \nChoices:\n(A) A twinkling effect \n(B) A rainbow effect \n(C) A black and white effect \n(D) A static effect " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.78125, "loss": 4.053069114685059 }, { "id": "identity_331210", "image": "PMC-VQA/images/PMC7417815_uog21873-fig-0001_232409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two diameters measured in this study?\nChoices:\n(A) Transverse and oblique diameters \n(B) Anteroposterior and sagittal diameters \n(C) Anteroposterior and coronal diameters \n(D) Radial and ulnar diameters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.234375, "loss": 4.281959533691406 }, { "id": "identity_112439", "image": "A-OKVQA/images/428440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dragon heads are visible on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.078125, "loss": 4.2549543380737305 }, { "id": "identity_316219", "image": "A-OKVQA/images/306420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the scarf called being worn by the person?\nChoices:\n(A) neckies\n(B) dupatta\n(C) hijab\n(D) dickies" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.421875, "loss": 4.755335807800293 }, { "id": "identity_315815", "image": "ScienceQA/images/14353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is highlighted?\nChoices:\n(A) Illinois\n(B) Wisconsin\n(C) South Dakota\n(D) Minnesota" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.28125, "loss": 4.269537448883057 }, { "id": "identity_316038", "image": "FigureQA/images/95366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.046875, "loss": 4.610969543457031 }, { "id": "identity_303982", "image": "AI2D/images/1165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is responsible for the cell's growth and reproduction?\nChoices:\n(A) Nucleus\n(B) Lysosome\n(C) Golgi apparatus\n(D) Plasma membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.234375, "loss": 4.067405700683594 }, { "id": "identity_312486", "image": "ScienceQA/images/8125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Seattle\n(B) Juneau\n(C) Saint Paul\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.38261604309082 }, { "id": "identity_334959", "image": "ScienceQA/images/14471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this object a solid, a liquid, or a gas?\nChoices:\n(A) a gas\n(B) a liquid\n(C) a solid" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2578125, "loss": 4.027459144592285 }, { "id": "identity_35082", "image": "PMC-VQA/images/PMC5811606_Fig7_275466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are shown?\nChoices:\n(A) MRI images \n(B) PET/CT images \n(C) SPECT/CT images \n(D) X-ray images " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.828125, "loss": 4.497220039367676 }, { "id": "identity_14518", "image": "PMC-VQA/images/PMC3398958_pone-0040742-g002_145538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrows indicate in the image? \nChoices:\n(A) Normal adjacent tubules \n(B) Areas of carcinoma with retained tubule architecture \n(C) Diffuse and cytoplasmic expression of PTPRJ in nests of carcinoma cells \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.53125, "loss": 4.768254280090332 }, { "id": "identity_324365", "image": "PMC-VQA/images/PMC7001139_FIG3_111193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What issues were found in this scan?\nChoices:\n(A) Small intestine dilation \n(B) Increase in renal mass \n(C) Decrease in liver size \n(D) Clear and normal scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.65625, "loss": 4.1359710693359375 }, { "id": "identity_336296", "image": "TextVQA/images/4d01ee8dd964bfa2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of module?" }, { "from": "gpt", "value": "The answer is service module " } ], "clip_score": 13.5859375, "loss": 4.129065036773682 }, { "id": "identity_290060", "image": "Super-CLEVR/images/superCLEVR_new_001636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the biplane made of that is located behind the red bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 23.8125, "loss": 4.455712795257568 }, { "id": "identity_291788", "image": "A-OKVQA/images/530013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After standing holding the bat vertically, which position will the person play next? \n(A) catcher \n(B) shortstop \n(C) manager \n(D) batter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.890625, "loss": 4.2576470375061035 }, { "id": "identity_157706", "image": "ScienceQA/images/12224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bird doing in the image?" }, { "from": "gpt", "value": "The answer is Foraging or Feeding" } ], "clip_score": 24.0625, "loss": 4.034818649291992 }, { "id": "identity_107683", "image": "Super-CLEVR/images/superCLEVR_new_000479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.734375, "loss": 4.623255252838135 }, { "id": "identity_277993", "image": "A-OKVQA/images/364201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main color of this vehicle similar to?\nChoices:\n(A) grass\n(B) flamingo\n(C) daisy\n(D) sky" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.703125, "loss": 4.108824729919434 }, { "id": "identity_273120", "image": "ScienceQA/images/5372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following colonies is depicted in the image? \n(A) Rhode Island\n(B) Tennessee\n(C) New Hampshire\n(D) Florida" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.578125, "loss": 4.434517860412598 }, { "id": "identity_48388", "image": "FigureQA/images/1829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 36.6875, "loss": 4.1145806312561035 }, { "id": "identity_20047", "image": "A-OKVQA/images/108111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of gathering is this?\nChoices:\n(A) business\n(B) religious\n(C) social\n(D) family" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.328125, "loss": 4.078281879425049 }, { "id": "identity_313397", "image": "Super-CLEVR/images/superCLEVR_new_001785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large purple metallic vehicles?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.65625, "loss": 5.169394493103027 }, { "id": "identity_309720", "image": "DVQA/images/bar_train_00137903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.03125, "loss": 4.125048637390137 }, { "id": "identity_34021", "image": "A-OKVQA/images/414588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is a specific breed of the animal of the table?\nChoices:\n(A) scottish fold\n(B) american lamancha\n(C) german shepherd\n(D) holland lop" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.96875, "loss": 4.463362693786621 }, { "id": "identity_325619", "image": "ScienceQA/images/6411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this text describe the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.4453125, "loss": 4.616742134094238 }, { "id": "identity_25599", "image": "PMC-VQA/images/PMC6982071_materials-13-00247-f002_106093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the strut captured in the image? \nChoices:\n(A) Upper Half \n(B) Left Half \n(C) Lower Half \n(D) Right Half " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.65625, "loss": 4.144630432128906 }, { "id": "identity_277299", "image": "PMC-VQA/images/PMC4684858_fig2_454668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the soft tissue mass located in the CT scan among the following choices: (A) Carotid space, (B) Buccal space, (C) Pterygopalatine fossa, or (D) Nasopharynx area?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.433544158935547 }, { "id": "identity_11413", "image": "PMC-VQA/images/PMC7403371_Fig1_226922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality shown in the image? \nChoices:\n(A) Ventricular septal defect, \n(B) Patent ductus arteriosus, \n(C) Atrial septal defect, \n(D) Tetralogy of Fallot. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.4375, "loss": 4.142905235290527 }, { "id": "identity_330536", "image": "PMC-VQA/images/PMC8008917_f2_406371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black asterisks indicate in the image?\nChoices:\n(A) Normal anatomy\n(B) Blood vessels\n(C) Lesions\n(D) Signal artifacts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 4.668551445007324 }, { "id": "identity_278040", "image": "A-OKVQA/images/498436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which beverage is the most probable option for the filled cups on the table among fruit drink, coffee, water, and soda?\nChoices:\n(A) fruit drink\n(B) coffee\n(C) water\n(D) soda" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9296875, "loss": 4.2713212966918945 }, { "id": "identity_296430", "image": "TQA/images/question_images/velocity_time_graphs_8210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this graphic, what is represented by the red line?\nChoices:\n(A) acceleration\n(B) deceleration\n(C) increasing speed\n(D) steady speed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.265625, "loss": 4.484753131866455 }, { "id": "identity_267727", "image": "TQA/images/question_images/atomic_structure_8007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the majority of an atom's mass located? \nChoices:\n(A) Proton\n(B) Nucleus\n(C) Electron\n(D) Neutron" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.328125, "loss": 4.471529960632324 }, { "id": "identity_192644", "image": "PMC-VQA/images/PMC5601223_epi13851-fig-0002_216272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the highlighted area in the brain?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 30.65625, "loss": 4.190655708312988 }, { "id": "identity_321313", "image": "PMC-VQA/images/PMC3431266_F2_152411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image depicting?\nChoices:\n(A) Side view of the T-NT \n(B) Bottom view of the T-NT \n(C) Top view of the T-NT \n(D) Cross-section of the T-NT" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.28125, "loss": 4.289666652679443 }, { "id": "identity_290300", "image": "TQA/images/abc_question_images/earth_day_night_12744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label that corresponds to the Arctic Circle among the given choices? \n(A) H\n(B) F\n(C) T\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.015625, "loss": 4.3558831214904785 }, { "id": "identity_268788", "image": "PMC-VQA/images/PMC7693410_children-07-00206-f001_306624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reference point for measurement P08?\nChoices:\n(A) The most inferior margin of the mandible.\n(B) The center of the mandibular canal.\n(C) The distance between the mandible and the center of the canal.\n(D) The distance between the mandible and the nasal cavity." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.28125, "loss": 4.026368141174316 }, { "id": "identity_270327", "image": "IconQA/images/9181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a crab, which picture corresponds to the sixth position in the sequence? Is it a boat, a crab, or a bucket?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.53125, "loss": 4.16089391708374 }, { "id": "identity_312834", "image": "PMC-VQA/images/PMC5823628_F2_279407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI is depicted in the images?\nChoices:\n(A) CT scan \n(B) T1-weighted MRI \n(C) T2-weighted MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.4375, "loss": 4.035423278808594 }, { "id": "identity_219361", "image": "A-OKVQA/images/140990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batter's helmet?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.546875, "loss": 4.110899925231934 }, { "id": "identity_22803", "image": "DocVQA/images/slmm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Tony Fisher is from which country?" }, { "from": "gpt", "value": "The answer is us" } ], "clip_score": 17.796875, "loss": 4.348294734954834 }, { "id": "identity_218820", "image": "A-OKVQA/images/4968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic cones are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.84375, "loss": 4.429018497467041 }, { "id": "identity_112968", "image": "Super-CLEVR/images/superCLEVR_new_000106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Light blue and white" } ], "clip_score": 24.78125, "loss": 4.187961578369141 }, { "id": "identity_36059", "image": "A-OKVQA/images/400371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fills the gray item the person in a blue shirt and white shorts sits upon?\nChoices:\n(A) air\n(B) oil\n(C) meat\n(D) plastic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.359375, "loss": 4.4245452880859375 }, { "id": "identity_311210", "image": "TextVQA/images/98005b9c129ca906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which numbers are handwritten?" }, { "from": "gpt", "value": "The answer is boardwalk on beach" } ], "clip_score": 28.46875, "loss": 4.786291599273682 }, { "id": "identity_324855", "image": "Super-CLEVR/images/superCLEVR_new_000939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fighter behind the small cyan aeroplane?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 16.546875, "loss": 4.184721946716309 }, { "id": "identity_283083", "image": "A-OKVQA/images/149440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of environment is depicted in the image? \nOptions:\n(A) forest\n(B) rural\n(C) coastal\n(D) urban" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 4.71484375, "loss": 4.415378570556641 }, { "id": "identity_264253", "image": "Super-CLEVR/images/superCLEVR_new_001402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small red metal choppers behind the red metallic motorbike compared to the number of big objects to the right of the large green thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.515625, "loss": 5.16542911529541 }, { "id": "identity_278525", "image": "AI2D/images/2931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct label for the fleshy folds on the outside of a mouth? \nChoices:\n(A) Soft palate\n(B) Hard palate\n(C) Gums\n(D) Lip" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.65625, "loss": 4.644629955291748 }, { "id": "identity_21797", "image": "A-OKVQA/images/536806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sentient beings are able to swim?\nChoices:\n(A) bugs\n(B) humans\n(C) goat\n(D) birds" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.515625, "loss": 4.22598934173584 }, { "id": "identity_241279", "image": "AI2D/images/2275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the above diagram, what is the sequence of stages from egg to fully grown adult turtle, and which part indicates the juvenile stage?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.28125, "loss": 5.469097137451172 }, { "id": "identity_304483", "image": "ScienceQA/images/11729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbia\n(B) Charleston\n(C) Biloxi\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.890625, "loss": 4.761292457580566 }, { "id": "identity_253793", "image": "A-OKVQA/images/348929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of items can you identify hanging on the lines on the upper levels, and what are they typically used for?" }, { "from": "gpt", "value": "The answer is One type, clothes for wearing" } ], "clip_score": 15.796875, "loss": 4.660313129425049 }, { "id": "identity_333609", "image": "IconQA/images/63550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) impossible\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.109375, "loss": 4.042504787445068 }, { "id": "identity_239703", "image": "TQA/images/abc_question_images/faults_10186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of fault where the two plates remain on the same plane and describe the relative motion of the plates" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 22.34375, "loss": 4.561553955078125 }, { "id": "identity_329309", "image": "AI2D/images/595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram shows the larva?\nChoices:\n(A) D\n(B) F\n(C) C\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.625, "loss": 4.080960273742676 }, { "id": "identity_115856", "image": "TextVQA/images/00c098c7959397c3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.421875, "loss": 4.304500579833984 }, { "id": "identity_52804", "image": "DocVQA/images/tqjm0223_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 24.5625, "loss": 4.597489833831787 }, { "id": "identity_127104", "image": "PMC-VQA/images/PMC8841721_F2_193712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any structures that appear to be overlapping in the image?" }, { "from": "gpt", "value": "The answer is Yes or No [based on observation" } ], "clip_score": 22.1875, "loss": 4.572065830230713 }, { "id": "identity_219585", "image": "PMC-VQA/images/PMC3730215_fig1_221921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see the diaphragm in this image?" }, { "from": "gpt", "value": "The answer is Yes, partially visible" } ], "clip_score": 27.40625, "loss": 4.0438432693481445 }, { "id": "identity_274", "image": "PMC-VQA/images/PMC5331247_ccr3798-fig-0001_133177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown in the caption? \nChoices:\n(A) PET scan \n(B) CT scan \n(C) X-ray \n(D) MRI scan " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.84375, "loss": 4.414417743682861 }, { "id": "identity_203245", "image": "A-OKVQA/images/125120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole pieces of fruit are visible on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.203125, "loss": 4.581864356994629 }, { "id": "identity_321534", "image": "PMC-VQA/images/PMC8901523_Fig12_215152.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this image from?\nChoices:\n(A) A research paper\n(B) A university website\n(C) A personal blog\n(D) A social media post" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.359375, "loss": 4.376774311065674 }, { "id": "identity_126126", "image": "TextVQA/images/12c82a20c9c89c02.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is depicted in the image?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 28.140625, "loss": 4.495481967926025 }, { "id": "identity_209125", "image": "PMC-VQA/images/PMC6878647_Fig2_74203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bright spots are visible in the optical coherence tomography image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.4375, "loss": 4.424064636230469 }, { "id": "identity_292719", "image": "ScienceQA/images/10301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could Matt's test potentially demonstrate among the following options?\nChoices:\n(A) The performance of the weather station in windy conditions\n(B) The functionality of the weather station at a temperature of 50°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.515625, "loss": 4.485665321350098 }, { "id": "identity_326496", "image": "A-OKVQA/images/486360.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the string they are holding?\nChoices:\n(A) bird\n(B) kite\n(C) top\n(D) dog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.4375, "loss": 4.463931083679199 }, { "id": "identity_276164", "image": "Super-CLEVR/images/superCLEVR_new_001484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any rubber objects located to the left of the small gray chopper?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.421875, "loss": 5.556650161743164 }, { "id": "identity_20799", "image": "AI2D/images/3602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of D?\nChoices:\n(A) Provide structural support\n(B) Transmit air to the lungs\n(C) Transmit food to the stomach\n(D) Protect the esophagus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.1875, "loss": 4.250036716461182 }, { "id": "identity_295704", "image": "PMC-VQA/images/PMC3094350_pone-0019629-g001_95238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the A-D photomicrographs?\nChoices:\n(A) glioblastoma tumor sections \n(B) normal brain sections \n(C) both glioblastoma tumor and normal brain \n(D) Nothing can be concluded from the caption." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.1899943351745605 }, { "id": "identity_271638", "image": "PMC-VQA/images/PMC6618001_dmcn14260-fig-0002_6377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is depicted in green on the T2-weighted MRI image?\nChoices:\n(A) Cerebellum\n(B) Cortex\n(C) Brainstem\n(D) Thalamus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.125, "loss": 4.097813606262207 }, { "id": "identity_257609", "image": "VizWiz/images/VizWiz_train_00001886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of water is in the bottle, and what other objects are visible on the desk?" }, { "from": "gpt", "value": "The answer is Poland Spring, phone, cable management hole" } ], "clip_score": 38.4375, "loss": 5.286867141723633 }, { "id": "identity_302045", "image": "A-OKVQA/images/125694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person holding?\nChoices:\n(A) their hair\n(B) some forks\n(C) a baby\n(D) an apple" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.796875, "loss": 4.431857109069824 }, { "id": "identity_20735", "image": "TextVQA/images/c3dfab45c2860094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: does the bottom say hairpieces of modern ?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.0078125, "loss": 4.728705883026123 }, { "id": "identity_65065", "image": "AI2D/images/3548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mitochondria are depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.75, "loss": 4.660192966461182 }, { "id": "identity_13504", "image": "A-OKVQA/images/453409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The tower behind the center festival decoration is used for broadcasting what?\nChoices:\n(A) cellular service\n(B) radar\n(C) television\n(D) radio" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.9375, "loss": 4.1177978515625 }, { "id": "identity_271258", "image": "ScienceQA/images/16956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a mammal?\nChoices:\n(A) toco toucan\n(B) green iguana\n(C) thresher shark\n(D) sea otter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.90625, "loss": 4.572842597961426 }, { "id": "identity_329718", "image": "A-OKVQA/images/476925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the bottle placed on that object?\nChoices:\n(A) prevent falling\n(B) keep cool\n(C) protect table\n(D) easier reach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.234375, "loss": 4.215514183044434 }, { "id": "identity_325403", "image": "PMC-VQA/images/PMC6906848_fig4_82485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black triangle indicate in this HE-stained image of testicular tissue in mice?\nChoices:\n(A) Blood vessels\n(B) Connective tissues\n(C) Differentiated Spermatogonia\n(D) Undifferentiated spermatogenic cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.125, "loss": 4.512545585632324 }, { "id": "identity_331972", "image": "VQA2.0/images/271118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What gender is the individual next to the dog?" }, { "from": "gpt", "value": "The answer is male" } ], "clip_score": 18.4375, "loss": 4.05534029006958 }, { "id": "identity_263780", "image": "TQA/images/abc_question_images/parts_skeleton_17270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the femur in the given choices: C, J, U, or W?\nChoices:\n(A) C\n(B) J\n(C) U\n(D) W" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.671875, "loss": 4.695922374725342 }, { "id": "identity_322148", "image": "TextVQA/images/0193991401f83e64.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is used on this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.5390625, "loss": 4.473433017730713 }, { "id": "identity_197264", "image": "VQA2.0/images/456545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the crosswalk?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.3125, "loss": 4.3399858474731445 }, { "id": "identity_329833", "image": "PMC-VQA/images/PMC4631848_fig2_440387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the pain located in the patient described in the Scout film?\nChoices:\n(A) Left side\n(B) Right side\n(C) Both sides\n(D) Abdomen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.59375, "loss": 4.209548473358154 }, { "id": "identity_290699", "image": "PMC-VQA/images/PMC7412277_materials-13-03186-f001_230475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what state was the alloy microstructure deformed as shown in the image? \nOptions:\n(A) Solid-solution\n(B) Annealed\n(C) Aged\n(D) Cold-worked" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.515625, "loss": 4.498970985412598 }, { "id": "identity_273490", "image": "A-OKVQA/images/562623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is depicted in the container? \nOptions:\n(A) tiger\n(B) lion\n(C) lion\n(D) horse" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.28125, "loss": 4.471354961395264 }, { "id": "identity_287232", "image": "PMC-VQA/images/PMC3861471_f1-etm-07-01-0199_251398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the status of the medial collateral ligament as shown on the preoperative MRI scan?\nChoices:\n(A) completely disrupted\n(B) partially torn\n(C) intact\n(D) inflamed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.1875, "loss": 4.411114692687988 }, { "id": "identity_327463", "image": "A-OKVQA/images/292990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the dominant hand for the batter in this image?\nChoices:\n(A) left\n(B) neither\n(C) left foot\n(D) right" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.484375, "loss": 4.521747589111328 }, { "id": "identity_304506", "image": "DocVQA/images/nywf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ID for the connection?" }, { "from": "gpt", "value": "The answer is HUA MEI" } ], "clip_score": 23.375, "loss": 4.128503799438477 }, { "id": "identity_21899", "image": "VizWiz/images/VizWiz_train_00001441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Please tell me the readings on this thermostat." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 32.875, "loss": 4.467329978942871 }, { "id": "identity_288192", "image": "PMC-VQA/images/PMC6380005_Fig2_437739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the CT scan, is there any evidence of mediastinal shift?\nChoices:\n(A) no mediastinal shift observed\n(B) counterclockwise mediastinal shift\n(C) oblique mediastinal shift\n(D) clockwise mediastinal shift" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.671875, "loss": 4.118443489074707 }, { "id": "identity_271769", "image": "PMC-VQA/images/PMC7016482_fig3_116074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of radiograph is utilized for cephalometric assessment of the skull?\nChoices:\n(A) Lateral radiograph\n(B) Posteroanterior radiograph\n(C) Anteroposterior radiograph\n(D) Oblique radiograph" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.566762447357178 }, { "id": "identity_326948", "image": "PMC-VQA/images/PMC2741099_fig1_45777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in this image indicate? \nChoices:\n(A) Cancer cells in blood vessels\n(B) Cancer cells invading the extracellular matrix\n(C) Normal cells in the extracellular matrix\n(D) Blood vessels in the extracellular matrix" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.6875, "loss": 4.66158390045166 }, { "id": "identity_64498", "image": "ScienceQA/images/18538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 31.609375, "loss": 4.360032081604004 }, { "id": "identity_35230", "image": "ScienceQA/images/12571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) each salmon . . . the surroundings\n(B) the surroundings . . . each salmon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.625, "loss": 4.137513160705566 }, { "id": "identity_331144", "image": "ScienceQA/images/4765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Charleston\n(B) Portland\n(C) Hartford\n(D) Augusta" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.9375, "loss": 4.475327491760254 }, { "id": "identity_292316", "image": "ScienceQA/images/14571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Baton Rouge\n(B) Phoenix\n(C) Honolulu\n(D) Hilo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.174073696136475 }, { "id": "identity_247018", "image": "A-OKVQA/images/59367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the environment and objects in the image, what activity are the people most likely engaged in, and where would you most likely see the long orange things used in this activity?" }, { "from": "gpt", "value": "The answer is swimming pool" } ], "clip_score": 18.65625, "loss": 4.605522155761719 }, { "id": "identity_19715", "image": "ScienceQA/images/2894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Ohio\n(C) Connecticut\n(D) Maine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.734375, "loss": 4.450169563293457 }, { "id": "identity_220907", "image": "VizWiz/images/VizWiz_train_00004425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the container with a blue lid on the table?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 23.625, "loss": 4.969592571258545 }, { "id": "identity_232726", "image": "TextVQA/images/297412b64ff28a1f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letters are displayed on the top floor of the building on the right, and how many windows are there on the top floor of the building on the left?" }, { "from": "gpt", "value": "The answer is ccf, 6" } ], "clip_score": 22.140625, "loss": 4.430882930755615 }, { "id": "identity_155490", "image": "ScienceQA/images/11277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is located directly to the west of Virginia?" }, { "from": "gpt", "value": "The answer is None" } ], "clip_score": 26.171875, "loss": 4.119538307189941 }, { "id": "identity_206817", "image": "A-OKVQA/images/117988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man to the right of the woman in red carrying?" }, { "from": "gpt", "value": "The answer is A camera" } ], "clip_score": 23.53125, "loss": 5.705881118774414 }, { "id": "identity_41676", "image": "DocVQA/images/yfjw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the principal investigator?" }, { "from": "gpt", "value": "The answer is Kirk, James R" } ], "clip_score": 21.5625, "loss": 4.530150890350342 }, { "id": "identity_287788", "image": "A-OKVQA/images/411138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type of outdoor structure are the plants growing?\nChoices:\n(A) barn\n(B) greenhouse\n(C) nursery\n(D) sunroom" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.224062919616699 }, { "id": "identity_266383", "image": "ChartQA/images/24837839005697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color is represented by the bar for Croatia?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 10.28125, "loss": 5.325623035430908 }, { "id": "identity_223562", "image": "TextVQA/images/e762b229f7140d59.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What promotional material is used for the featured product?" }, { "from": "gpt", "value": "The answer is A large printed sign" } ], "clip_score": 22.703125, "loss": 4.183755397796631 }, { "id": "identity_4077", "image": "TQA/images/abc_question_images/aquifers_16532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the diagram represented with the letter F?\nChoices:\n(A) CONFINED AQUIFER \n(B) WATER TABLE WELL\n(C) FLOWING ARTESIAN WELL\n(D) UNCONFINED AQUIFER" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.90625, "loss": 4.220628261566162 }, { "id": "identity_9408", "image": "FigureQA/images/31702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5, "loss": 4.166275978088379 }, { "id": "identity_151604", "image": "ScienceQA/images/10039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible on the map including the highlighted one?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.171875, "loss": 4.3603620529174805 }, { "id": "identity_157446", "image": "AI2D/images/346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages does the grasshopper life cycle have according to the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 33.125, "loss": 4.436335563659668 }, { "id": "identity_230368", "image": "Super-CLEVR/images/superCLEVR_new_000365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the object that is directly behind the school bus and describe its color and shape" }, { "from": "gpt", "value": "The answer is Red rectangular vehicle" } ], "clip_score": 21.171875, "loss": 4.424657821655273 }, { "id": "identity_114871", "image": "Super-CLEVR/images/superCLEVR_new_001111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the purple bus?" }, { "from": "gpt", "value": "The answer is A green bus" } ], "clip_score": 25.140625, "loss": 4.719882488250732 }, { "id": "identity_213841", "image": "A-OKVQA/images/8285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of accessory is the person in the foreground, on the left side, wearing on their head?" }, { "from": "gpt", "value": "The answer is A white cap" } ], "clip_score": 24.890625, "loss": 4.561824321746826 }, { "id": "identity_185162", "image": "A-OKVQA/images/482381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots are hanging above the stove?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.078125, "loss": 4.285111904144287 }, { "id": "identity_332127", "image": "TextVQA/images/00a209837c2bbec5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tag of this vehicle?" }, { "from": "gpt", "value": "The answer is mirabus" } ], "clip_score": 15.75, "loss": 4.229087829589844 }, { "id": "identity_302307", "image": "IconQA/images/41415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.75, "loss": 4.146927356719971 }, { "id": "identity_28694", "image": "IconQA/images/2179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.375, "loss": 4.368584632873535 }, { "id": "identity_270854", "image": "ScienceQA/images/1109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New York?\nChoices:\n(A) Carson City\n(B) Albany\n(C) Annapolis\n(D) Augusta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.34375, "loss": 4.10783576965332 }, { "id": "identity_268532", "image": "PMC-VQA/images/PMC9247462_F2_328773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images display the lesion with gadolinium enhancement? \nOptions:\n(A) T1-pre-gadolinium images\n(B) T1-post-gadolinium images\n(C) T2-weighted images\n(D) Diffusion-weighted images" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.34375, "loss": 4.1711530685424805 }, { "id": "identity_293404", "image": "A-OKVQA/images/206397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following processes is related to these animals: nuclear fusion, shearing, soaring, or photosynthesis?\nChoices:\n(A) nuclear fusion\n(B) shearing\n(C) soaring\n(D) photosynthesis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.875, "loss": 4.090246677398682 }, { "id": "identity_71657", "image": "TQA/images/question_images/blastocyst_9031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct structures are labeled in the image of the zygote?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.796875, "loss": 4.081960201263428 }, { "id": "identity_308239", "image": "A-OKVQA/images/482081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many giraffes are visible in the image above?\nChoices:\n(A) four\n(B) one\n(C) five\n(D) none" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.828125, "loss": 4.220938682556152 }, { "id": "identity_291745", "image": "IconQA/images/5573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture comes after the barn in the sequence of images provided?\nChoices:\n(A) barn\n(B) sun\n(C) tractor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.75, "loss": 4.442030429840088 }, { "id": "identity_191015", "image": "PMC-VQA/images/PMC3466236_pone-0047230-g003_159344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the brain shows the colored section, right or left?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 31.03125, "loss": 4.1795244216918945 }, { "id": "identity_337020", "image": "TQA/images/question_images/blastocyst_9034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Considering a Blastocyst, which part will develop into a new human being?\nChoices:\n(A) Embryoblast\n(B) Trophoblast\n(C) Blastocyst Cavity\n(D) Zygote" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.65625, "loss": 5.081469535827637 }, { "id": "identity_38407", "image": "TextVQA/images/4425a34121ab4fd7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first letter of the first name of the title?" }, { "from": "gpt", "value": "The answer is c" } ], "clip_score": 8.5546875, "loss": 4.162249565124512 }, { "id": "identity_5077", "image": "TQA/images/question_images/human_system_reproductory_7021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Urine empties from Urinary bladder, through the Urethra and out which of the following?\nChoices:\n(A) External urethral orifice\n(B) Hymen\n(C) Clitoris\n(D) Mons pubis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.21875, "loss": 4.201573371887207 }, { "id": "identity_320448", "image": "TabMWP/images/38307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.359375, "loss": 4.297605991363525 }, { "id": "identity_38482", "image": "A-OKVQA/images/107225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns the billboard illuminated in the most golden lighting above the NY Police dept?\nChoices:\n(A) izod\n(B) duane reade\n(C) yahoo\n(D) police" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.21875, "loss": 4.047518730163574 }, { "id": "identity_35823", "image": "A-OKVQA/images/136185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this bookshelf located?\nChoices:\n(A) home\n(B) courtroom\n(C) store\n(D) library" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.171875, "loss": 4.210353851318359 }, { "id": "identity_314629", "image": "ScienceQA/images/17213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Memphis\n(C) Birmingham\n(D) Richmond" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.275794982910156 }, { "id": "identity_281434", "image": "A-OKVQA/images/557568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the action being performed by the player in the foreground?\nChoices:\n(A) designated runner\n(B) practicing swing\n(C) at bat\n(D) stealing base" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.814476013183594 }, { "id": "identity_322918", "image": "PMC-VQA/images/PMC8876035_jcm-11-01002-f001_205890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the midbrain on these axial FLAIR images?\nChoices:\n(A) Decreased signals \n(B) No change in signals \n(C) Increased signals \n(D) Irregular signals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.734375, "loss": 4.256525993347168 }, { "id": "identity_305421", "image": "VQA2.0/images/293233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this player?" }, { "from": "gpt", "value": "The answer is boggs" } ], "clip_score": 23.09375, "loss": 4.200802326202393 }, { "id": "identity_215594", "image": "PMC-VQA/images/PMC6618072_hed25675-fig-0006_6431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the green arrow suggest about the condition of the patient?" }, { "from": "gpt", "value": "The answer is It suggests spinal canal pathology" } ], "clip_score": 31.796875, "loss": 4.607912063598633 }, { "id": "identity_262471", "image": "FigureQA/images/41242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Violet Red or Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.890625, "loss": 4.429825305938721 }, { "id": "identity_198591", "image": "A-OKVQA/images/50927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the dugout behind the batter?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 29.265625, "loss": 4.543727397918701 }, { "id": "identity_153263", "image": "AI2D/images/4428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type has the second highest number of leaves?" }, { "from": "gpt", "value": "The answer is Type d" } ], "clip_score": 31.515625, "loss": 4.160026550292969 }, { "id": "identity_67353", "image": "AI2D/images/2303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images depict the adult stage of the ladybug?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.34375, "loss": 4.748403549194336 }, { "id": "identity_36637", "image": "PMC-VQA/images/PMC8040235_Fig4_415818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of X-rays is shown in the figure? \nChoices:\n(A) Panoramic lateral cephalometric view X-rays \n(B) Preoperative radiographic X-rays \n(C) Postoperative lateral and frontal view X-rays \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5, "loss": 4.4485907554626465 }, { "id": "identity_300472", "image": "AI2D/images/3942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central part?\nChoices:\n(A) nucleus\n(B) starch grain\n(C) flagella\n(D) cell membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.8125, "loss": 4.685192584991455 }, { "id": "identity_278659", "image": "DocVQA/images/hrbn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the invoice number?" }, { "from": "gpt", "value": "The answer is ACCTCESystemJul16" } ], "clip_score": 24.765625, "loss": 6.003750324249268 }, { "id": "identity_132933", "image": "VQA2.0/images/532967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete loops can be counted in the coiled rope?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.34375, "loss": 4.047976493835449 }, { "id": "identity_13258", "image": "ScienceQA/images/19359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the mammal below.\nChoices:\n(A) African bullfrog\n(B) rabbit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.609375, "loss": 4.063216686248779 }, { "id": "identity_3931", "image": "VQA2.0/images/562488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the tie have stripes?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.375, "loss": 4.2833123207092285 }, { "id": "identity_290584", "image": "PMC-VQA/images/PMC6688467_F3_21973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object indicated by the arrow in the image? \nOptions:\n(A) A root hair \n(B) A developing leaf \n(C) A germinating seed \n(D) A mature fruit" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 4.850394248962402 }, { "id": "identity_2846", "image": "FigureQA/images/84768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Gold have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.984375, "loss": 4.322690010070801 }, { "id": "identity_188014", "image": "A-OKVQA/images/432704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the lifeboat?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 28.46875, "loss": 4.458396911621094 }, { "id": "identity_329381", "image": "A-OKVQA/images/416101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being held in the person's hands?\nChoices:\n(A) tennis balls\n(B) straws\n(C) rocks\n(D) doves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.328125, "loss": 4.312536716461182 }, { "id": "identity_37528", "image": "AI2D/images/2992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in the diagram?\nChoices:\n(A) pons\n(B) hypothalamus\n(C) cerebrum\n(D) medulla" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.229435920715332 }, { "id": "identity_285975", "image": "A-OKVQA/images/328216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What obstacle are the people aiming to overcome? \nChoices:\n(A) train tracks\n(B) pirate swords\n(C) rooftops\n(D) river" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.859375, "loss": 4.212886333465576 }, { "id": "identity_302042", "image": "ScienceQA/images/5690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boise\n(B) Portland\n(C) Albuquerque\n(D) Salem" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.40625, "loss": 4.509045600891113 }, { "id": "identity_273634", "image": "ScienceQA/images/6705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Montana?\n(A) Helena\n(B) Missoula\n(C) Billings\n(D) Olympia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.109375, "loss": 4.047025203704834 }, { "id": "identity_46131", "image": "FigureQA/images/22732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 33.15625, "loss": 4.3996262550354 }, { "id": "identity_268567", "image": "IconQA/images/15388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.140625, "loss": 4.447106838226318 }, { "id": "identity_288680", "image": "PMC-VQA/images/PMC8153145_pathogens-10-00595-f002_452132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which radiographic pattern was identified in the cat diagnosed with aelurostrongylosis? Was it:\n(A) Nodular\n(B) Cavitary\n(C) Interstitial\n(D) Bronchial" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.921875, "loss": 4.3044281005859375 }, { "id": "identity_212801", "image": "A-OKVQA/images/150342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags can be seen next to the person?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.6171875, "loss": 4.773528099060059 }, { "id": "identity_264517", "image": "PMC-VQA/images/PMC6452502_ccr32032-fig-0001_462275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrow in the CT scan indicate?\nChoices:\n(A) Normal tonsil tissue\n(B) A tumor\n(C) Peritonsillar abscess\n(D) Swollen lymph node." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.03125, "loss": 4.748565673828125 }, { "id": "identity_286811", "image": "PMC-VQA/images/PMC7394290_fig2_223351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body are the opacities located?\nChoices:\n(A) Right lung\n(B) Both lungs\n(C) Left lung\n(D) Abdomen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.109375, "loss": 4.959066867828369 }, { "id": "identity_250604", "image": "PMC-VQA/images/PMC6945254_f1_94970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the ultrasound image that indicate the use of a convex probe, and how do these characteristics compare to those of images produced by linear, phased array, and intracavitary probes?" }, { "from": "gpt", "value": "The answer is Convex probe characteristics match the image features" } ], "clip_score": 28.828125, "loss": 4.087695598602295 }, { "id": "identity_33669", "image": "PMC-VQA/images/PMC7362644_Fig1_214575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which limbs show a neurogenic pattern? \nChoices:\n(A) Lower limbs \n(B) Upper limbs \n(C) Both limbs \n(D) No particular limb " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.4375, "loss": 4.169529438018799 }, { "id": "identity_73513", "image": "AI2D/images/3983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In diagram A, what does the label \"L.T\" refer to?" }, { "from": "gpt", "value": "The answer is Lateral transport" } ], "clip_score": 21.984375, "loss": 4.675503253936768 }, { "id": "identity_295476", "image": "A-OKVQA/images/4266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is this activity intended?\nChoices:\n(A) racing\n(B) photography\n(C) practice\n(D) leisure" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.90625, "loss": 4.8977952003479 }, { "id": "identity_271509", "image": "PMC-VQA/images/PMC6026116_Fig4_335593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category does the image in (a) belong to?\nChoices:\n(A) PA joint image\n(B) LED image\n(C) Combined image\n(D) MRI image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.234375, "loss": 5.072263717651367 }, { "id": "identity_334534", "image": "PMC-VQA/images/PMC4977429_F2_60801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the largest perivascular spaces?\nChoices:\n(A) Left posterior insula\n(B) Right anterior insula\n(C) Left anterior insula\n(D) Right posterior insula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.8125, "loss": 4.371304988861084 }, { "id": "identity_30798", "image": "A-OKVQA/images/210342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of wet area is nearby?\nChoices:\n(A) stream\n(B) pond\n(C) ocean\n(D) swimming pool" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3359375, "loss": 4.299088478088379 }, { "id": "identity_324872", "image": "PMC-VQA/images/PMC7484669_F4_251931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are impacted by the COVID-19 pneumonia on the second day of illness?\nChoices:\n(A) Both upper lobes.\n(B) Right lower lobe and left middle lobe.\n(C) Right middle lobe and left lower lobe.\n(D) Left upper lobe and right lower lobe." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.25, "loss": 4.160838603973389 }, { "id": "identity_315367", "image": "VizWiz/images/VizWiz_train_00016425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is visible here?" }, { "from": "gpt", "value": "The answer is icons" } ], "clip_score": 22.5625, "loss": 4.753824710845947 }, { "id": "identity_194919", "image": "VQA2.0/images/308143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the dog's fur primarily?" }, { "from": "gpt", "value": "The answer is Golden" } ], "clip_score": 25.4375, "loss": 4.208347797393799 }, { "id": "identity_217495", "image": "A-OKVQA/images/490629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street signs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.421875, "loss": 4.314400672912598 }, { "id": "identity_6821", "image": "AI2D/images/3140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label points to the upper epidermis of the leaf?\nChoices:\n(A) N\n(B) P\n(C) B\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.765625, "loss": 4.149601936340332 }, { "id": "identity_315445", "image": "A-OKVQA/images/129648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's happening to this person?\nChoices:\n(A) bouncing\n(B) being chased\n(C) falling\n(D) doing tricks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.5, "loss": 4.159005165100098 }, { "id": "identity_32006", "image": "Super-CLEVR/images/superCLEVR_new_001938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small blue matte object have the same shape as the large blue shiny object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.671875, "loss": 5.210501670837402 }, { "id": "identity_131102", "image": "Super-CLEVR/images/superCLEVR_new_000694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest object in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 14.609375, "loss": 4.395327568054199 }, { "id": "identity_24084", "image": "PMC-VQA/images/PMC7931658_f3_384026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding in the posterior aspect of the globes of both eyes?\nChoices:\n(A) Calcification\n(B) Cyst\n(C) Hemorrhage\n(D) Tumor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.984375, "loss": 4.3879876136779785 }, { "id": "identity_22649", "image": "A-OKVQA/images/236282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the women in the lower right doing?\nChoices:\n(A) texting\n(B) petting\n(C) eating\n(D) photographing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6875, "loss": 4.219534397125244 }, { "id": "identity_310486", "image": "ScienceQA/images/19636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boulder\n(B) Columbus\n(C) Portland\n(D) Cincinnati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.953125, "loss": 4.457312107086182 }, { "id": "identity_272171", "image": "A-OKVQA/images/202843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the large number of people wearing orange?\nChoices:\n(A) supporting team\n(B) required uniform\n(C) visibility\n(D) distraction" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.7265625, "loss": 4.741264343261719 }, { "id": "identity_315411", "image": "PMC-VQA/images/PMC2803997_F1_54397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the osteochondral mass located in the left knee?\nChoices:\n(A) medial,\n(B) lateral,\n(C) anterior,\n(D) posterior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 4.846487522125244 }, { "id": "identity_106618", "image": "VQA2.0/images/139183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding horses in this image?" }, { "from": "gpt", "value": "The answer is Three people" } ], "clip_score": 24.3125, "loss": 4.638765811920166 }, { "id": "identity_269233", "image": "FigureQA/images/19523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.65625, "loss": 4.186966896057129 }, { "id": "identity_299666", "image": "TextVQA/images/3492f2b6c34e80cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the character depicted on the left side of Duffy?" }, { "from": "gpt", "value": "The answer is roz" } ], "clip_score": 21.734375, "loss": 4.943647861480713 }, { "id": "identity_13355", "image": "AI2D/images/3768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area is closest to the eyes?\nChoices:\n(A) M\n(B) B\n(C) A\n(D) K" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8671875, "loss": 4.284718036651611 }, { "id": "identity_115080", "image": "PMC-VQA/images/PMC6322781_pone.0209674.g008_419942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the images?" }, { "from": "gpt", "value": "The answer is Transverse orientation" } ], "clip_score": 23.859375, "loss": 4.001187324523926 }, { "id": "identity_308279", "image": "VQA-RAD/images/synpic20260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small mass near this kidney?" }, { "from": "gpt", "value": "The answer is Splenule" } ], "clip_score": 26.3125, "loss": 4.255627632141113 }, { "id": "identity_234595", "image": "TQA/images/abc_question_images/faults_11736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of fault shown in the image and explain the relative movements of the hanging wall and footwall" }, { "from": "gpt", "value": "The answer is Normal fault" } ], "clip_score": 23.84375, "loss": 4.955826282501221 }, { "id": "identity_17853", "image": "ScienceQA/images/2177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement describes the Steigerwald Forest ecosystem?\nChoices:\n(A) It has many different types of trees.\n(B) It has only a few types of trees.\n(C) It has soil that is poor in nutrients." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.921875, "loss": 4.244336128234863 }, { "id": "identity_31640", "image": "ScienceQA/images/9466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) Biloxi\n(C) Albany\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.609375, "loss": 4.208685874938965 }, { "id": "identity_304020", "image": "DVQA/images/bar_train_00198065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.453125, "loss": 4.048975467681885 }, { "id": "identity_280375", "image": "FigureQA/images/72352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Forest Green or Cyan?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.796875, "loss": 4.047057628631592 }, { "id": "identity_22209", "image": "PMC-VQA/images/PMC8168914_FIG1_457581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI scan reveal about the patient's condition after surgery? \nChoices:\n(A) Residual tumor is found \n(B) The patient is completely healed \n(C) Hemorrhage is found in the tumor bed \n(D) Gad is used as a treatment " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.328125, "loss": 4.030767917633057 }, { "id": "identity_296619", "image": "VizWiz/images/VizWiz_train_00008642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what that is?" }, { "from": "gpt", "value": "The answer is table at establishment" } ], "clip_score": 19.453125, "loss": 4.893826961517334 }, { "id": "identity_285412", "image": "A-OKVQA/images/334125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the square items resembling a necklace used for?\nChoices:\n(A) snacks\n(B) mail\n(C) flying\n(D) computer chips" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5625, "loss": 4.456272602081299 }, { "id": "identity_320926", "image": "ScienceQA/images/10241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be said about the Catoctin Mountain Park ecosystem?\nChoices:\n(A) It has many different types of trees.\n(B) It has soil that is poor in nutrients.\n(C) It has soil that is rich in nutrients." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.453125, "loss": 4.29296875 }, { "id": "identity_5398", "image": "PMC-VQA/images/PMC7393836_Fig1_223221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the tumorous lesion located?\nChoices:\n(A) Medial rectus muscle of the left eye \n(B) Lateral rectus muscle of the left eye \n(C) Medial rectus muscle of the right eye \n(D) Lateral rectus muscle of the right eye " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.12498664855957 }, { "id": "identity_308312", "image": "ScienceQA/images/12433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system correct or incorrect?\nEarth is the biggest planet that is composed mainly of rock.\nChoices:\n(A) correct\n(B) incorrect" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0625, "loss": 4.206657409667969 }, { "id": "identity_134656", "image": "DocVQA/images/lxmw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the publishing company mentioned in the telegram?" }, { "from": "gpt", "value": "The answer is Forrestal Center" } ], "clip_score": 23.4375, "loss": 4.646955966949463 }, { "id": "identity_191775", "image": "A-OKVQA/images/260679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tires are visible on the bus?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.9375, "loss": 4.37265682220459 }, { "id": "identity_18495", "image": "VizWiz/images/VizWiz_train_00014569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What vocab word is in number one? " }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.609375, "loss": 4.093647003173828 }, { "id": "identity_297822", "image": "A-OKVQA/images/412764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which species is involved in the competition? Is it:\n(A) feline\n(B) canine\n(C) bovine\n(D) ovine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.1640625, "loss": 4.567244052886963 }, { "id": "identity_320524", "image": "A-OKVQA/images/161725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What instrument is the person in the straw hat playing?\nChoices:\n(A) saxophone\n(B) clarinet\n(C) trumpet\n(D) guitar" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.78125, "loss": 4.186346530914307 }, { "id": "identity_293368", "image": "ScienceQA/images/11622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from the surroundings to each refrigerator or from each refrigerator to the surroundings?\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.140625, "loss": 4.221682548522949 }, { "id": "identity_30405", "image": "A-OKVQA/images/296599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which word best describes this train station?\nChoices:\n(A) new\n(B) alive\n(C) decrepit\n(D) bustling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.15625, "loss": 4.814003944396973 }, { "id": "identity_333689", "image": "DVQA/images/bar_train_00126110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is hen" } ], "clip_score": 17.21875, "loss": 5.129356861114502 }, { "id": "identity_195491", "image": "VQA2.0/images/547528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.234375, "loss": 4.308281421661377 }, { "id": "identity_27896", "image": "PMC-VQA/images/PMC7344590_diagnostics-10-00385-f001_207844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used in the diagnosis? \nChoices:\n(A) MRI \n(B) CT scan \n(C) PET scan \n(D) X-Ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.015625, "loss": 4.72310733795166 }, { "id": "identity_333406", "image": "IconQA/images/64765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.346479892730713 }, { "id": "identity_214067", "image": "VQA2.0/images/223836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles have visible white wall tires?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.484375, "loss": 4.692868709564209 }, { "id": "identity_262509", "image": "FigureQA/images/92728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options correctly describes the smoothness of Peru? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5, "loss": 4.49550724029541 }, { "id": "identity_332185", "image": "PMC-VQA/images/PMC7652921_Fig2_294443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of this image?\nChoices:\n(A) Dogs\n(B) Cats\n(C) Hemorrhage\n(D) Trees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.140625, "loss": 4.711447238922119 }, { "id": "identity_106369", "image": "Super-CLEVR/images/superCLEVR_new_001596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.03125, "loss": 4.620770454406738 }, { "id": "identity_59413", "image": "DocVQA/images/ftvn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this fax?" }, { "from": "gpt", "value": "The answer is E. Canshaw" } ], "clip_score": 23.796875, "loss": 4.317572116851807 }, { "id": "identity_188076", "image": "VQA2.0/images/29018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many train cars can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.140625, "loss": 4.286007881164551 }, { "id": "identity_335273", "image": "A-OKVQA/images/490306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The child is positioning their body in a way similar to a model by doing what?\nChoices:\n(A) walking\n(B) posing\n(C) crying\n(D) twirling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.515625, "loss": 4.154403209686279 }, { "id": "identity_30363", "image": "PMC-VQA/images/PMC7803468_Fig4_341104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the lung ultrasound reveal in the left lower lateral chest?\nChoices:\n(A) Focal thickening \n(B) Pulmonary nodule \n(C) Sonographic consolidation \n(D) Malignant tumor " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.40625, "loss": 4.035983085632324 }, { "id": "identity_246598", "image": "Super-CLEVR/images/superCLEVR_new_000257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is directly in front of the green bus and behind the purple bicycle?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 29.6875, "loss": 4.615438938140869 }, { "id": "identity_301627", "image": "AI2D/images/1355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does C stand for?\nChoices:\n(A) spring\n(B) fall\n(C) sun\n(D) winter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.15625, "loss": 4.609361171722412 }, { "id": "identity_191585", "image": "PMC-VQA/images/PMC3667362_Fig1_207823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images labeled with any specific medical terminology?" }, { "from": "gpt", "value": "The answer is Yes, labeled with figure references" } ], "clip_score": 26.15625, "loss": 4.001773357391357 }, { "id": "identity_127921", "image": "VQA2.0/images/370250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paint rollers are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.640625, "loss": 4.76020622253418 }, { "id": "identity_10142", "image": "TQA/images/abc_question_images/ocean_waves_17120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the trough?\nChoices:\n(A) L\n(B) M\n(C) X\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.796875, "loss": 4.260782718658447 }, { "id": "identity_277646", "image": "Super-CLEVR/images/superCLEVR_new_001534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large purple metal jets to the left of the big yellow regular bus with a small bus behind it?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.984375, "loss": 5.263500213623047 }, { "id": "identity_268220", "image": "FigureQA/images/84606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rosy Brown the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 4.7527971267700195 }, { "id": "identity_286132", "image": "A-OKVQA/images/384496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is he currently engaged in?\nChoices:\n(A) stealing horses\n(B) plowing field\n(C) feeding horses\n(D) riding horses" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.606050968170166 }, { "id": "identity_333372", "image": "ScienceQA/images/2476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) breakable\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.820174694061279 }, { "id": "identity_188871", "image": "VizWiz/images/VizWiz_train_00002266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chains are hanging from the clock?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.796875, "loss": 4.341750144958496 }, { "id": "identity_264882", "image": "FigureQA/images/75575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.284899711608887 }, { "id": "identity_263431", "image": "Super-CLEVR/images/superCLEVR_new_000618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the red object shaped the same as the green object located to the right of the large yellow shiny school bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.765625, "loss": 5.1924333572387695 }, { "id": "identity_312686", "image": "FigureQA/images/4021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.3125, "loss": 4.089794635772705 }, { "id": "identity_309368", "image": "A-OKVQA/images/223481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this grey car driving on?\nChoices:\n(A) parking lot\n(B) dirt\n(C) street\n(D) sand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.59375, "loss": 4.254244804382324 }, { "id": "identity_186384", "image": "PMC-VQA/images/PMC4631946_F15_440502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a specific medical condition in this image?" }, { "from": "gpt", "value": "The answer is No indication provided" } ], "clip_score": 24.078125, "loss": 4.245733261108398 }, { "id": "identity_34244", "image": "Super-CLEVR/images/superCLEVR_new_001886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small cyan object that is behind the big brown object made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.6875, "loss": 4.547137260437012 }, { "id": "identity_328645", "image": "ScienceQA/images/19064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is farthest east?\nChoices:\n(A) Maine\n(B) Virginia\n(C) Arizona\n(D) Georgia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.1875, "loss": 4.2904462814331055 }, { "id": "identity_107614", "image": "TextVQA/images/032b047f44d6cf41.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the guitar visible in the image?" }, { "from": "gpt", "value": "The answer is Natural wood or light brown" } ], "clip_score": 20.046875, "loss": 4.182527542114258 }, { "id": "identity_317043", "image": "PMC-VQA/images/PMC7405101_ccr33163-fig-0004_227594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is seen in the aorta of this case?\nChoices:\n(A) An aneurysm \n(B) A tear \n(C) A dissection \n(D) It is normal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 4.694855690002441 }, { "id": "identity_278501", "image": "FigureQA/images/56175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.265625, "loss": 4.570841312408447 }, { "id": "identity_212945", "image": "A-OKVQA/images/359451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the trash can to the left of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 12.6171875, "loss": 4.285086631774902 }, { "id": "identity_112776", "image": "VizWiz/images/VizWiz_train_00007241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mounting brackets are visible for the railing?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 30.46875, "loss": 4.286812782287598 }, { "id": "identity_304698", "image": "A-OKVQA/images/24755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are the people gathered behind the skiers watching?\nChoices:\n(A) judges\n(B) spectators\n(C) diners\n(D) waiters" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.015625, "loss": 4.40008544921875 }, { "id": "identity_132830", "image": "VizWiz/images/VizWiz_train_00008979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible light sources are there in the room, including the lamp in the foreground?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 28.9375, "loss": 4.181055545806885 }, { "id": "identity_5450", "image": "PMC-VQA/images/PMC7545596_acn351170-fig-0001_268761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead point to in Patient No. 3? \nChoices:\n(A) Right ventro-oral nucleus \n(B) Left ventro-oral nucleus \n(C) Left ventro-dorsal nucleus \n(D) Right ventro-dorsal nucleus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.4375, "loss": 4.149535179138184 }, { "id": "identity_324069", "image": "VQA2.0/images/26101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the ground behind this?" }, { "from": "gpt", "value": "The answer is fence" } ], "clip_score": 18.84375, "loss": 4.0035319328308105 }, { "id": "identity_126862", "image": "A-OKVQA/images/196944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headwear is the zebra adorned with?" }, { "from": "gpt", "value": "The answer is A striped top hat" } ], "clip_score": 27.34375, "loss": 4.133059501647949 }, { "id": "identity_187103", "image": "VQA2.0/images/387264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is displayed on the television screen?" }, { "from": "gpt", "value": "The answer is News channel with a female anchor" } ], "clip_score": 14.0859375, "loss": 4.206284999847412 }, { "id": "identity_15361", "image": "IconQA/images/42717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.930071830749512 }, { "id": "identity_205250", "image": "A-OKVQA/images/314370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cartons of eggs are visible on the counter?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.40625, "loss": 4.468530654907227 }, { "id": "identity_5463", "image": "FigureQA/images/86691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Orange have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0, "loss": 4.101190090179443 }, { "id": "identity_25833", "image": "VizWiz/images/VizWiz_train_00007464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me how to make this product?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.671875, "loss": 4.415686130523682 }, { "id": "identity_31645", "image": "A-OKVQA/images/293589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hat is the man wearing?\nChoices:\n(A) baseball\n(B) sombrero\n(C) fedora\n(D) tippy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.171875, "loss": 4.427523136138916 }, { "id": "identity_131648", "image": "VizWiz/images/VizWiz_train_00016820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cigarette butts are visible in the ashtray?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 32.59375, "loss": 4.524033546447754 }, { "id": "identity_10906", "image": "A-OKVQA/images/453819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is above water?\nChoices:\n(A) boat\n(B) swimmer\n(C) surfer\n(D) dolphin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.265625, "loss": 4.3625617027282715 }, { "id": "identity_28420", "image": "A-OKVQA/images/532976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the yellow truck doing?\nChoices:\n(A) transporting goods\n(B) receiving donation\n(C) mobile library\n(D) selling food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.65625, "loss": 4.055006980895996 }, { "id": "identity_317026", "image": "TQA/images/abc_question_images/muscle_fiber_17090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Myelin sheath?\nChoices:\n(A) A\n(B) R\n(C) Y\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.063918590545654 }, { "id": "identity_278514", "image": "PMC-VQA/images/PMC2650704_F1_35335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the asterisks shown in the image?\nChoices:\n(A) Isolated aggregates of PECAM-1 positive cells \n(B) Continuous vessels \n(C) Discontinuous vessels \n(D) PECAM-1 negative cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.15625, "loss": 4.210864067077637 }, { "id": "identity_281415", "image": "FigureQA/images/75313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Burlywood or Medium Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.296875, "loss": 4.632106781005859 }, { "id": "identity_32987", "image": "ScienceQA/images/872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Fiji\n(B) Nauru\n(C) Solomon Islands\n(D) Vanuatu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.4375, "loss": 4.352508544921875 }, { "id": "identity_147857", "image": "DocVQA/images/ttwg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the administrator mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Miss Elizabeth R. Nanes" } ], "clip_score": 24.359375, "loss": 4.371816158294678 }, { "id": "identity_301611", "image": "TQA/images/question_images/hydrocarbons_8094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The following structures are what?\nChoices:\n(A) Isomers\n(B) Cyclic\n(C) Alkene\n(D) Chiral" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.921875, "loss": 4.1704912185668945 }, { "id": "identity_20094", "image": "A-OKVQA/images/49119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are ships carrying goods called?\nChoices:\n(A) freighter\n(B) sail boat\n(C) cargo ship\n(D) goods ship" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.1015625, "loss": 4.0216803550720215 }, { "id": "identity_199684", "image": "A-OKVQA/images/315322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many construction cranes can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.75, "loss": 4.590618133544922 }, { "id": "identity_206161", "image": "VQA2.0/images/58343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary sport being played in the image?" }, { "from": "gpt", "value": "The answer is Soccer" } ], "clip_score": 22.625, "loss": 4.136931419372559 }, { "id": "identity_205406", "image": "A-OKVQA/images/111940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many exhaust pipes are visible on the motorcycle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.96875, "loss": 4.428778648376465 }, { "id": "identity_133868", "image": "TextVQA/images/a679ac728e700b84.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrellas in the image within the letter 'H'?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 10.9296875, "loss": 4.224821090698242 }, { "id": "identity_16002", "image": "Super-CLEVR/images/superCLEVR_new_001212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the bicycle behind the red thing that is on the right side of the green bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.0625, "loss": 4.830167293548584 }, { "id": "identity_299822", "image": "PMC-VQA/images/PMC5530458_Fig1_196408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of test was conducted to determine the mass in the chest: MRI, X-ray, CT scan, or Ultrasound?\nChoices:\n(A) MRI \n(B) X-ray \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.765625, "loss": 4.060338020324707 }, { "id": "identity_310530", "image": "PMC-VQA/images/PMC8316742_ags312444-fig-0005_21292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the caption indicate in individuals with colorectal cancer?\nChoices:\n(A) Lung metastasis\n(B) Brain metastasis\n(C) Liver metastasis\n(D) Pancreatic metastasis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.055737495422363 }, { "id": "identity_115937", "image": "TextVQA/images/df4bd8fbc98dd7cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many computer monitors are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.375, "loss": 5.095492839813232 }, { "id": "identity_31232", "image": "A-OKVQA/images/268049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area outside the building would be described as what by a person?\nChoices:\n(A) mild\n(B) warm\n(C) hot\n(D) cold" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.5234375, "loss": 4.3592705726623535 }, { "id": "identity_38303", "image": "ScienceQA/images/4190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Hampshire?\nChoices:\n(A) Helena\n(B) Concord\n(C) Nashville\n(D) Trenton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.083105087280273 }, { "id": "identity_136295", "image": "DocVQA/images/tznf0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is this fax addressed?" }, { "from": "gpt", "value": "The answer is Dan Reicher" } ], "clip_score": 33.0, "loss": 4.316157341003418 }, { "id": "identity_290336", "image": "DocVQA/images/shln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of the facsimile transmission?" }, { "from": "gpt", "value": "The answer is Draft gulf standards" } ], "clip_score": 27.328125, "loss": 4.066627025604248 }, { "id": "identity_307308", "image": "TQA/images/abc_question_images/nuclear_energy_17100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Steam Line?\nChoices:\n(A) W\n(B) U\n(C) X\n(D) C" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.828125, "loss": 4.409445762634277 }, { "id": "identity_154500", "image": "ScienceQA/images/10479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many limbs can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.921875, "loss": 4.525219917297363 }, { "id": "identity_55328", "image": "DocVQA/images/fpjw0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the system code mentioned in the form?" }, { "from": "gpt", "value": "The answer is WCNV" } ], "clip_score": 21.328125, "loss": 4.361929893493652 }, { "id": "identity_278335", "image": "PlotQA/images/11504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what manner are the legend labels arranged?" }, { "from": "gpt", "value": "The answer is vertical" } ], "clip_score": 19.59375, "loss": 4.160422325134277 }, { "id": "identity_1152", "image": "A-OKVQA/images/352127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is shown?\nChoices:\n(A) rail\n(B) road\n(C) air\n(D) water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6640625, "loss": 4.2087554931640625 }, { "id": "identity_106555", "image": "A-OKVQA/images/209930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light towers are there in the stadium?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.890625, "loss": 4.13926362991333 }, { "id": "identity_300741", "image": "TQA/images/question_images/human_system_respiratory_1329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What links the lungs to the pharynx and larynx?\nChoices:\n(A) Nasal passage\n(B) Bronchi\n(C) Trachea\n(D) Oral cavity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 4.05495023727417 }, { "id": "identity_268740", "image": "ScienceQA/images/2054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object?\nChoices:\n(A) salty\n(B) fuzzy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.671875, "loss": 4.561493873596191 }, { "id": "identity_268035", "image": "IconQA/images/42383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a circle using this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.71875, "loss": 4.068216800689697 }, { "id": "identity_11590", "image": "FigureQA/images/21868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.546875, "loss": 4.013177871704102 }, { "id": "identity_319577", "image": "A-OKVQA/images/65063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is it called when this creature moves?\nChoices:\n(A) trot\n(B) slither\n(C) slime\n(D) roll" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.0625, "loss": 4.136474609375 }, { "id": "identity_299075", "image": "Super-CLEVR/images/superCLEVR_new_001330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there less green tandem bikes compared to tiny red rubber fighters?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.984375, "loss": 5.239712238311768 }, { "id": "identity_303470", "image": "ScienceQA/images/10067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Grenada\n(B) Cuba\n(C) Jamaica\n(D) Saint Vincent and the Grenadines" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.15625, "loss": 4.741360664367676 }, { "id": "identity_127894", "image": "PMC-VQA/images/PMC8863485_fig3_199756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cusps are visible in this coplanar view?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.25, "loss": 4.405174255371094 }, { "id": "identity_19271", "image": "A-OKVQA/images/493485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man with the curly hair holding?\nChoices:\n(A) banana\n(B) trunk\n(C) baby\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.40625, "loss": 4.444892883300781 }, { "id": "identity_337249", "image": "A-OKVQA/images/8285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who might be at risk of falling?\nChoices:\n(A) prop\n(B) father\n(C) ladder\n(D) girl" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.7421875, "loss": 4.00458288192749 }, { "id": "identity_300004", "image": "Super-CLEVR/images/superCLEVR_new_000118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the large vehicle that is right of the large truck made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 20.59375, "loss": 4.72689962387085 }, { "id": "identity_208492", "image": "TextVQA/images/0257f9e3387efc1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number of the BMW?" }, { "from": "gpt", "value": "The answer is MS EXEC" } ], "clip_score": 29.34375, "loss": 4.103999614715576 }, { "id": "identity_319313", "image": "DVQA/images/bar_train_00047291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the green color represent?" }, { "from": "gpt", "value": "The answer is cabin" } ], "clip_score": 24.390625, "loss": 4.344120979309082 }, { "id": "identity_286198", "image": "A-OKVQA/images/470462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the form of the leather on the horse here?\nChoices:\n(A) skirt\n(B) chaps\n(C) apron\n(D) harness" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.5, "loss": 4.413827419281006 }, { "id": "identity_217572", "image": "TextVQA/images/b2f716a3390e71a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing sunglasses on their caps?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.453125, "loss": 4.4711198806762695 }, { "id": "identity_245573", "image": "A-OKVQA/images/169562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items in the room have a pumpkin theme, and what are they?" }, { "from": "gpt", "value": "The answer is One item, the pillow" } ], "clip_score": 21.9375, "loss": 4.372444152832031 }, { "id": "identity_32556", "image": "A-OKVQA/images/565500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a symbol for the word in green letters on the bus?\nChoices:\n(A) rose\n(B) fist\n(C) 2 snakes\n(D) 4-leaf clover" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.96875, "loss": 4.086780548095703 }, { "id": "identity_328452", "image": "A-OKVQA/images/253262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the truck with some yellow, red, and white on it do here?\nChoices:\n(A) issue citations\n(B) provide snacks\n(C) fuel plane\n(D) sell ice cream" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.3828125, "loss": 4.505420684814453 }, { "id": "identity_319211", "image": "IconQA/images/54866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0625, "loss": 4.272222995758057 }, { "id": "identity_331049", "image": "ScienceQA/images/20629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which month has the lowest average temperature here?\nChoices:\n(A) January\n(B) February\n(C) November" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.71875, "loss": 4.315739631652832 }, { "id": "identity_20543", "image": "Super-CLEVR/images/superCLEVR_new_001357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of matte things in front of the small metallic aeroplane and cyan metallic buss that are to the right of the rubber bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 30.671875, "loss": 4.957217216491699 }, { "id": "identity_308491", "image": "IconQA/images/20194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.859375, "loss": 4.182024002075195 }, { "id": "identity_328307", "image": "A-OKVQA/images/285131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the Jones Theater located?\nChoices:\n(A) speer\n(B) 12th\n(C) curtis\n(D) spire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.671875, "loss": 4.303135871887207 }, { "id": "identity_328745", "image": "PMC-VQA/images/PMC3466236_pone-0047230-g003_159344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of these sections on this section?\nChoices:\n(A) Vertical \n(B) Parallel \n(C) Perpendicular \n(D) Diagonal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.4375, "loss": 4.507376194000244 }, { "id": "identity_273958", "image": "ScienceQA/images/3191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\nChoices:\n(A) Cleveland\n(B) Providence\n(C) Harrisburg\n(D) Augusta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.015625, "loss": 4.07301139831543 }, { "id": "identity_12706", "image": "AI2D/images/3155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central vein of a leaf\nChoices:\n(A) Meddle\n(B) Midrib\n(C) Middle\n(D) Midlife" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.84375, "loss": 4.280434608459473 }, { "id": "identity_222131", "image": "VQA2.0/images/523754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kayaks are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.265625, "loss": 4.769453525543213 }, { "id": "identity_240158", "image": "VQA2.0/images/416978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the image are using umbrellas?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 26.21875, "loss": 5.074249267578125 }, { "id": "identity_38258", "image": "DVQA/images/bar_train_00035465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the left?" }, { "from": "gpt", "value": "The answer is dear" } ], "clip_score": 16.703125, "loss": 4.8815741539001465 }, { "id": "identity_21120", "image": "TextVQA/images/72fb9c801c70cd1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what type of food is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.28125, "loss": 4.215103626251221 }, { "id": "identity_223171", "image": "TextVQA/images/c6f6d097f2611b69.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the clock?" }, { "from": "gpt", "value": "The answer is Bayard" } ], "clip_score": 22.0, "loss": 4.086249828338623 }, { "id": "identity_318738", "image": "FigureQA/images/70475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Dark Blue the largest?\n\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.322598457336426 }, { "id": "identity_281824", "image": "PMC-VQA/images/PMC5428603_F4_162744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What symptoms are linked to the condition pointed out by the arrow in the image?\nChoices:\n(A) Nausea and vomiting \n(B) Headaches and dizziness \n(C) Blurred vision \n(D) Joint pain and stiffness." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.484375, "loss": 4.009762763977051 }, { "id": "identity_25441", "image": "AI2D/images/987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The follicle is indicated by which letter on the diagram?\nChoices:\n(A) R\n(B) H\n(C) D\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.421875, "loss": 4.1371636390686035 }, { "id": "identity_274282", "image": "Super-CLEVR/images/superCLEVR_new_000813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the articulated bus and the yellow car the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.359375, "loss": 5.496303558349609 }, { "id": "identity_284774", "image": "PMC-VQA/images/PMC6441667_Fig7_459427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of radiograph was taken after the right mandibular resection?\nOptions:\n(A) Periapical radiograph\n(B) Panoramic radiograph\n(C) Bite-wing radiograph\n(D) Occlusal radiograph" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.4375, "loss": 4.187892436981201 }, { "id": "identity_67443", "image": "AI2D/images/1848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers are depicted in the food chain diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.453125, "loss": 4.490400791168213 }, { "id": "identity_271784", "image": "ScienceQA/images/3537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the property that best describes this object: rough or soft?\nChoices:\n(A) rough\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.59375, "loss": 4.455893516540527 }, { "id": "identity_296535", "image": "VQA2.0/images/237886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of people that can wash their hands simultaneously in this place?" }, { "from": "gpt", "value": "The answer is many" } ], "clip_score": 22.5, "loss": 4.458502769470215 }, { "id": "identity_34383", "image": "PMC-VQA/images/PMC7566610_Fig7_276673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are affected in the CT scan? \nChoices:\n(A) Left upper and lower lobes \n(B) Right upper and lower lobes \n(C) Left lower lobe only \n(D) Right middle and lower lobes. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.203509330749512 }, { "id": "identity_73050", "image": "AI2D/images/946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird's beak is shaped for cracking seeds?" }, { "from": "gpt", "value": "The answer is Bird at label B" } ], "clip_score": 30.1875, "loss": 4.494235515594482 }, { "id": "identity_305786", "image": "TextVQA/images/b1935c9505498f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of beer is this?" }, { "from": "gpt", "value": "The answer is zeglia" } ], "clip_score": 30.46875, "loss": 5.483199596405029 }, { "id": "identity_262554", "image": "FigureQA/images/97830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Black or Burlywood?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.03125, "loss": 4.646718502044678 }, { "id": "identity_306679", "image": "Super-CLEVR/images/superCLEVR_new_001134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the green bicycle and the purple matte bicycle the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.765625, "loss": 5.295420169830322 }, { "id": "identity_127256", "image": "Super-CLEVR/images/superCLEVR_new_000666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the green matte car and behind the purple matte scooter?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 32.3125, "loss": 4.356153964996338 }, { "id": "identity_34410", "image": "PMC-VQA/images/PMC7841286_F1_354972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which eye is shown in the image?\nChoices:\n(A) Left eye \n(B) Right eye \n(C) Both eyes \n(D) Cannot be determined " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.75, "loss": 4.354674339294434 }, { "id": "identity_332029", "image": "TabMWP/images/20001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given data, on which day was the highest number of ice cream cones sold?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.25, "loss": 4.5132951736450195 }, { "id": "identity_294104", "image": "PMC-VQA/images/PMC8502242_FIG1_79241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the image, in which artery is there a thrombus?\nChoices:\n(A) Internal jugular artery\n(B) Left common carotid artery\n(C) Right common carotid artery\n(D) Vertebral artery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.65625, "loss": 4.005105972290039 }, { "id": "identity_138544", "image": "DocVQA/images/yrvw0217_54.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the overall effect of the treatments on carbohydrate metabolism in the Premarin/TMG clinical trials?" }, { "from": "gpt", "value": "The answer is no clinically important effects" } ], "clip_score": 24.484375, "loss": 4.252717971801758 }, { "id": "identity_21853", "image": "AI2D/images/4431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The fin labeled C is considered which fin?\nChoices:\n(A) Trailing dorsal\n(B) Trigger\n(C) Split\n(D) Single" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.0, "loss": 4.46122407913208 }, { "id": "identity_308816", "image": "FigureQA/images/11868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.436464309692383 }, { "id": "identity_14458", "image": "DVQA/images/bar_train_00144288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest summed value?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 24.546875, "loss": 4.1319122314453125 }, { "id": "identity_199712", "image": "VQA-AS/images/14625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are in the air?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.4375, "loss": 5.099571704864502 }, { "id": "identity_282564", "image": "PMC-VQA/images/PMC4350961_fig4_364376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the scale bar in the given context?\nChoices:\n(A) Image resolution \n(B) Field of view \n(C) Magnification level \n(D) Image brightness" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.1796875, "loss": 4.909613609313965 }, { "id": "identity_151917", "image": "AI2D/images/4467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different pronation types are depicted in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 34.5, "loss": 4.7733683586120605 }, { "id": "identity_34091", "image": "FigureQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.515625, "loss": 4.049264430999756 }, { "id": "identity_5712", "image": "AI2D/images/3322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by C in the diagram?\nChoices:\n(A) cytoskeleton\n(B) none of the above\n(C) golgi body\n(D) ribosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 4.23024845123291 }, { "id": "identity_327441", "image": "A-OKVQA/images/195296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the bananas doing on this yellow cloth?\nChoices:\n(A) being dried\n(B) being cooked\n(C) being sold\n(D) being eaten" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.546875, "loss": 4.458514213562012 }, { "id": "identity_187032", "image": "VQA-AS/images/10183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.328125, "loss": 4.494194984436035 }, { "id": "identity_287948", "image": "IconQA/images/80229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a moon, which picture corresponds to the ninth position in the sequence?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.671875, "loss": 4.270392417907715 }, { "id": "identity_279218", "image": "A-OKVQA/images/323827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the presence of rings symbolize in the given context?\nChoices:\n(A) dolphin show\n(B) square dance\n(C) school play\n(D) olympics" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.5, "loss": 4.28914737701416 }, { "id": "identity_268879", "image": "A-OKVQA/images/416356.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What ingredients are included in the spaghetti dish?\nChoices:\n(A) meatball\n(B) shrimp\n(C) tomato sauce\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.671875, "loss": 4.186300754547119 }, { "id": "identity_1518", "image": "TQA/images/question_images/photosynthesis_1317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the product of photosynthesis?\nChoices:\n(A) Glucose\n(B) Carbon dioxide\n(C) Water\n(D) Sunlight" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.203125, "loss": 4.076448917388916 }, { "id": "identity_276", "image": "PMC-VQA/images/PMC4047780_F3_295237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is displayed in image (B)? \nChoices:\n(A) Panoramic radiograph \n(B) CBCT \n(C) MRI \n(D) CT scan " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.953125, "loss": 4.238837242126465 }, { "id": "identity_114847", "image": "VQA2.0/images/201659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are present on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.25, "loss": 4.269983768463135 }, { "id": "identity_270477", "image": "A-OKVQA/images/298649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the preferred destination for the people in the car to visit among the following options: kid's playground, sand pits, ocean, or snow mountains?\nChoices:\n(A) kid's playground\n(B) sand pits\n(C) ocean\n(D) snow mountains" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.640625, "loss": 4.5908379554748535 }, { "id": "identity_313909", "image": "A-OKVQA/images/52661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the large tarp suspended over the deck of the yacht?\nChoices:\n(A) blue\n(B) gray\n(C) green\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0, "loss": 4.049914360046387 }, { "id": "identity_121915", "image": "PMC-VQA/images/PMC5820407_Fig4_278594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hearts are depicted in the image?" }, { "from": "gpt", "value": "The answer is Two hearts" } ], "clip_score": 25.359375, "loss": 4.511410236358643 }, { "id": "identity_26601", "image": "IconQA/images/7744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.984375, "loss": 4.129042148590088 }, { "id": "identity_171592", "image": "IconQA/images/90067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the stars from the sequence, how many shapes remain?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.6875, "loss": 4.160555839538574 }, { "id": "identity_243918", "image": "A-OKVQA/images/369180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are involved in containing the elephants, and what are they using to contain them?" }, { "from": "gpt", "value": "The answer is Five people using a string" } ], "clip_score": 26.6875, "loss": 4.743977069854736 }, { "id": "identity_121846", "image": "PMC-VQA/images/PMC6746013_f4_38074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located at the center of the image?" }, { "from": "gpt", "value": "The answer is Spinal canal" } ], "clip_score": 28.109375, "loss": 4.033901691436768 }, { "id": "identity_281482", "image": "TabMWP/images/33873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.1059465408325195 }, { "id": "identity_302273", "image": "TextVQA/images/1079d274d2730d15.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written in yellow on the blue sign?" }, { "from": "gpt", "value": "The answer is nova" } ], "clip_score": 14.4921875, "loss": 5.35587215423584 }, { "id": "identity_213397", "image": "PMC-VQA/images/PMC6787152_F3_49142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the cell nuclei in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.078125, "loss": 4.012228965759277 }, { "id": "identity_72570", "image": "ScienceQA/images/6121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arms does the organism in the image have?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 21.421875, "loss": 5.041766166687012 }, { "id": "identity_12812", "image": "ScienceQA/images/17414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this food web, which organism contains matter that eventually moves to the bolete fungus?\nChoices:\n(A) black racer\n(B) black bear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.34375, "loss": 4.384782314300537 }, { "id": "identity_57197", "image": "DVQA/images/bar_train_00119091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest number of people preferring 'mouth'?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 29.953125, "loss": 4.733889102935791 }, { "id": "identity_104445", "image": "TextVQA/images/03b92d00a3433780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the white banner at the bottom right corner of the image?" }, { "from": "gpt", "value": "The answer is energie wasser dienstleistung stadtverkehr" } ], "clip_score": 16.125, "loss": 4.132968902587891 }, { "id": "identity_287759", "image": "A-OKVQA/images/196917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is located next to the vehicle? \n(A) moose\n(B) giraffe\n(C) cow\n(D) monkey" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.7578125, "loss": 4.385532379150391 }, { "id": "identity_292523", "image": "PMC-VQA/images/PMC6475549_fig1_468927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the abnormality detected in the CT scan from the following options:\n(A) Sphingomyelin deposition\n(B) Septal enhancement\n(C) Bronchitis\n(D) Asthma." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.984375, "loss": 4.771435737609863 }, { "id": "identity_37638", "image": "ScienceQA/images/6974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Maine\n(B) Connecticut\n(C) New Hampshire\n(D) Vermont" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.671875, "loss": 4.531965732574463 }, { "id": "identity_198362", "image": "VQA2.0/images/236438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of equipment is the person using?" }, { "from": "gpt", "value": "The answer is Overhead" } ], "clip_score": 14.6953125, "loss": 4.401390075683594 }, { "id": "identity_263214", "image": "VQA2.0/images/143783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what location is the dog currently seated?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.5, "loss": 4.112216949462891 }, { "id": "identity_94974", "image": "TabMWP/images/32252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of broken cookies per pack?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 37.6875, "loss": 4.19829797744751 }, { "id": "identity_297984", "image": "VQA2.0/images/8345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the object that is pink?" }, { "from": "gpt", "value": "The answer is towel" } ], "clip_score": 10.65625, "loss": 4.281433582305908 }, { "id": "identity_107280", "image": "PMC-VQA/images/PMC4383027_f3-ijo-46-05-1913_374392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images show the presence of CD44 marker?" }, { "from": "gpt", "value": "The answer is Two images" } ], "clip_score": 22.8125, "loss": 4.952004909515381 }, { "id": "identity_302650", "image": "ScienceQA/images/20989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Louisville\n(B) Charleston\n(C) Baton Rouge\n(D) Huntington" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.28125, "loss": 4.8093976974487305 }, { "id": "identity_5650", "image": "VQA2.0/images/541764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the shadow of the cart?" }, { "from": "gpt", "value": "The answer is front of cart" } ], "clip_score": 21.125, "loss": 4.2495245933532715 }, { "id": "identity_308320", "image": "FigureQA/images/56647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color greater than Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.578125, "loss": 4.132260322570801 }, { "id": "identity_294434", "image": "PMC-VQA/images/PMC5922503_FIG3_306980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pleural effusion typically located in the chest? \nChoices:\n(A) Right side.\n(B) Front side.\n(C) Left side.\n(D) Back side." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.8125, "loss": 4.099978446960449 }, { "id": "identity_116801", "image": "VQA2.0/images/116004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red hats in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.0, "loss": 4.417759895324707 }, { "id": "identity_36033", "image": "IconQA/images/71406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.507983684539795 }, { "id": "identity_302636", "image": "ScienceQA/images/7601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Nashville\n(B) Austin\n(C) Memphis\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 4.618793487548828 }, { "id": "identity_16191", "image": "Super-CLEVR/images/superCLEVR_new_001831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the yellow matte object that is behind the rubber jet have the same size as the object behind the large yellow tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.90625, "loss": 4.849419593811035 }, { "id": "identity_323490", "image": "PMC-VQA/images/PMC7801276_F1-ad-12-1-203_340304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan of the individual with cortical venous thrombosis reveal?\nChoices:\n(A) Hemorrhagic stroke in a temporal lobe\n(B) Venous hemorrhagic infarct in a parietal lobe\n(C) Disappearance of some cortical veins\n(D) Normal brain scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.84375, "loss": 4.784470081329346 }, { "id": "identity_289988", "image": "PMC-VQA/images/PMC4877350_Fig9_33144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which description accurately represents the appearance of the pelvic masses among the given choices? \nChoices:\n(A) Solid\n(B) Hypodense\n(C) Hollow\n(D) Calcified" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.171875, "loss": 4.084982872009277 }, { "id": "identity_321661", "image": "ScienceQA/images/852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Connecticut\n(B) New Hampshire\n(C) Massachusetts\n(D) Wisconsin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.840115070343018 }, { "id": "identity_123707", "image": "VQA-RAD/images/synpic20375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of brain swelling around the lesion?" }, { "from": "gpt", "value": "The answer is No clear evidence" } ], "clip_score": 29.578125, "loss": 4.448149681091309 }, { "id": "identity_9891", "image": "TQA/images/question_images/human_system_eye_6133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the retina?\nChoices:\n(A) This is where the image of the object normally forms\n(B) It carries the nerve impulses to the brain\n(C) focuses the light on the back of the eye\n(D) protective covering on the outside of the eye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.6875, "loss": 4.187396049499512 }, { "id": "identity_124355", "image": "Super-CLEVR/images/superCLEVR_new_000968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object located to the left of the yellow airplane?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.4375, "loss": 4.03883695602417 }, { "id": "identity_310699", "image": "ScienceQA/images/996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Rhode Island\n(B) Kentucky\n(C) Florida\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 4.452395915985107 }, { "id": "identity_76435", "image": "AI2D/images/2063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are depicted in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.4375, "loss": 4.689868450164795 }, { "id": "identity_311230", "image": "FigureQA/images/75523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.578125, "loss": 4.589735507965088 }, { "id": "identity_109347", "image": "VizWiz/images/VizWiz_train_00007868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many holes are visible on the shelf where the product is placed?" }, { "from": "gpt", "value": "The answer is Two holes" } ], "clip_score": 25.171875, "loss": 4.210420608520508 }, { "id": "identity_21574", "image": "DVQA/images/bar_train_00089131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the indianred color represent?" }, { "from": "gpt", "value": "The answer is teeth" } ], "clip_score": 20.046875, "loss": 5.253538131713867 }, { "id": "identity_286510", "image": "ScienceQA/images/3142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Idaho?\nChoices:\n(A) New Haven\n(B) Boise\n(C) Nampa\n(D) Helena" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.203125, "loss": 4.050478935241699 }, { "id": "identity_4959", "image": "TQA/images/question_images/parts_microscope_7202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would one used to look at the magnified image by the microscope?\nChoices:\n(A) ocular lens\n(B) body tube\n(C) revolving nosepiece\n(D) stage" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.9375, "loss": 4.282749176025391 }, { "id": "identity_275330", "image": "FigureQA/images/12297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.921875, "loss": 4.686794281005859 }, { "id": "identity_115149", "image": "VQA2.0/images/246099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book?" }, { "from": "gpt", "value": "The answer is Vladimir Kaminer Russendisko" } ], "clip_score": 14.1875, "loss": 4.523525714874268 }, { "id": "identity_314378", "image": "PMC-VQA/images/PMC5102410_pone.0165239.g001_93702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image indicate? \nChoices:\n(A) Continuity of some allograft \n(B) Continuity of a graft \n(C) Discontinuity of a graft \n(D) Discontinuity of another graft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.0, "loss": 4.640038013458252 }, { "id": "identity_296653", "image": "DVQA/images/bar_train_00034963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which category does the color steelblue belong?" }, { "from": "gpt", "value": "The answer is reform" } ], "clip_score": 24.4375, "loss": 4.064460277557373 }, { "id": "identity_10901", "image": "A-OKVQA/images/64149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will they clean with the item in the pink box?\nChoices:\n(A) face\n(B) floor\n(C) table\n(D) screen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.9453125, "loss": 4.372509956359863 }, { "id": "identity_1997", "image": "ScienceQA/images/16271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this experiment, which were part of an experimental group?\nChoices:\n(A) the plots with fires\n(B) the plots with no fires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.75, "loss": 4.098598957061768 }, { "id": "identity_335208", "image": "PMC-VQA/images/PMC9157057_rbac030-F6_298596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of samples are shown in the images?\nChoices:\n(A) Plants \n(B) Animals \n(C) Rocks \n(D) Unknown" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.296875, "loss": 5.8674139976501465 }, { "id": "identity_103373", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the scoreboard directly above the batter?" }, { "from": "gpt", "value": "The answer is ELITE SHORE" } ], "clip_score": 29.34375, "loss": 4.068368911743164 }, { "id": "identity_258122", "image": "VQA2.0/images/147972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify three distinct features that indicate the country where this picture was taken, and explain how each feature supports your conclusion" }, { "from": "gpt", "value": "The answer is India" } ], "clip_score": 20.96875, "loss": 4.02398681640625 }, { "id": "identity_195608", "image": "VQA2.0/images/273268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the ground floor?" }, { "from": "gpt", "value": "The answer is number of people counted" } ], "clip_score": 17.125, "loss": 4.032619476318359 }, { "id": "identity_311921", "image": "A-OKVQA/images/40912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person holding an umbrella?\nChoices:\n(A) to dance\n(B) keep dry\n(C) to give\n(D) to sell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.734375, "loss": 4.571114540100098 }, { "id": "identity_334781", "image": "A-OKVQA/images/169226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What photographic technique was used to capture the movement of vehicles on the road?\nChoices:\n(A) hdr\n(B) time-lapse\n(C) panorama\n(D) bokeh" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.135312080383301 }, { "id": "identity_198813", "image": "PMC-VQA/images/PMC3759309_f4_228448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign called that is seen in the right lung?" }, { "from": "gpt", "value": "The answer is Ring sign" } ], "clip_score": 26.21875, "loss": 4.925393581390381 }, { "id": "identity_22738", "image": "A-OKVQA/images/174896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the little green bottle in between the two top bowls?\nChoices:\n(A) salad dressing\n(B) steak sauce\n(C) bitters\n(D) alcohol" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.4375, "loss": 4.221440315246582 }, { "id": "identity_331861", "image": "PMC-VQA/images/PMC5806936_dentistry-04-00029-f006_273972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green arrow indicating in this image?\nChoices:\n(A) Oedema\n(B) Necrosis\n(C) Inflammation\n(D) Signal gain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.6875, "loss": 5.684142112731934 }, { "id": "identity_323228", "image": "PMC-VQA/images/PMC9228668_polymers-14-02360-f001_323007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging mode used in these micrographs?\nChoices:\n(A) BSE mode\n(B) SE mode\n(C) EDS mode\n(D) CL mode" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.890625, "loss": 4.481934547424316 }, { "id": "identity_273927", "image": "AI2D/images/2116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the food web provided, which organisms compete for Algae as a food source? \n(A) Insects and Spartina\n(B) Spiders and Mudcrabs\n(C) Detritus and Nematodes\n(D) All of the Above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.359375, "loss": 4.271269798278809 }, { "id": "identity_14988", "image": "ScienceQA/images/5412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Cuba\n(B) Saint Vincent and the Grenadines\n(C) Grenada\n(D) Trinidad and Tobago" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.078125, "loss": 4.34926700592041 }, { "id": "identity_5904", "image": "IconQA/images/92387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.453125, "loss": 4.152812957763672 }, { "id": "identity_306541", "image": "AI2D/images/3248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central part of this structure?\nChoices:\n(A) cytoplasm\n(B) nucleus\n(C) mitochondrion\n(D) golgi apparatus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.25, "loss": 4.479708671569824 }, { "id": "identity_115863", "image": "VQA2.0/images/61729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.453125, "loss": 4.256442546844482 }, { "id": "identity_215818", "image": "VQA2.0/images/289545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many gaming consoles are visible?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.765625, "loss": 4.577805995941162 }, { "id": "identity_30368", "image": "Super-CLEVR/images/superCLEVR_new_000468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the airliner that is the same size as the metallic road bike?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 26.671875, "loss": 4.032842636108398 }, { "id": "identity_326710", "image": "IconQA/images/63604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a house. What is the ninth image?\nChoices:\n(A) dog\n(B) house\n(C) paw" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.65625, "loss": 4.207033634185791 }, { "id": "identity_323846", "image": "ScienceQA/images/15314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Trenton\n(B) New Haven\n(C) Hartford\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.15625, "loss": 4.051941871643066 }, { "id": "identity_299788", "image": "PMC-VQA/images/PMC5602807_Fig1_216782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe of the liver is the tumor located?\nChoices:\n(A) right lobe\n(B) left lobe\n(C) both lobes\n(D) unknown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.8125, "loss": 4.31074857711792 }, { "id": "identity_338007", "image": "TextVQA/images/494d0363faf88044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is coming?" }, { "from": "gpt", "value": "The answer is love " } ], "clip_score": 23.09375, "loss": 4.70431661605835 }, { "id": "identity_281925", "image": "PMC-VQA/images/PMC4542182_Figure4_415736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure depicted in the image. Is it:\n(A) Vascular channel wall\n(B) Smooth muscle cells\n(C) Cancer cells\n(D) Blood clots" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.437539577484131 }, { "id": "identity_271051", "image": "VizWiz/images/VizWiz_train_00011589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.796875, "loss": 4.266452312469482 }, { "id": "identity_263709", "image": "TextVQA/images/003bed7fa4de9fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the black sign?" }, { "from": "gpt", "value": "The answer is lil siam" } ], "clip_score": 17.921875, "loss": 4.313144683837891 }, { "id": "identity_194369", "image": "VQA2.0/images/517395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the building in the background?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 17.5, "loss": 4.05161714553833 }, { "id": "identity_26289", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of structure is sitting ont he top right hand corner of the train?\nChoices:\n(A) fire\n(B) statue\n(C) lighthouse\n(D) skyscraper" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.008730888366699 }, { "id": "identity_8999", "image": "DVQA/images/bar_train_00125430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the left?" }, { "from": "gpt", "value": "The answer is animal" } ], "clip_score": 23.03125, "loss": 4.221497535705566 }, { "id": "identity_297953", "image": "PMC-VQA/images/PMC7459781_diagnostics-10-00530-f001_243185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the location of the fatty degeneration?\nChoices:\n(A) Cervical spine\n(B) Thoracic spine\n(C) Lumbar spine\n(D) Sacral spine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.059060573577881 }, { "id": "identity_266356", "image": "ScienceQA/images/9041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? \nChoices:\n(A) Saint Lucia\n(B) Saint Vincent and the Grenadines\n(C) Barbados\n(D) Grenada" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.921875, "loss": 4.034211158752441 }, { "id": "identity_220288", "image": "VQA2.0/images/186784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of appliance is to the left of the sink?" }, { "from": "gpt", "value": "The answer is A toaster" } ], "clip_score": 23.6875, "loss": 4.173930644989014 }, { "id": "identity_285151", "image": "PMC-VQA/images/PMC6048845_Fig4_342569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted on the left side of the OCT image in case 12?\nChoices:\n(A) Central retinal artery occlusion\n(B) Full-thickness macular hole\n(C) Myopic traction maculopathy\n(D) Branch retinal vein occlusion." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.133761405944824 }, { "id": "identity_297931", "image": "PMC-VQA/images/PMC7502059_Fig2_257138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is being imaged in the following options:\n(A) Right leg\n(B) Left leg\n(C) Hip\n(D) Shoulder" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5625, "loss": 4.5240373611450195 }, { "id": "identity_281253", "image": "AI2D/images/1974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the food web, what category does the killer whale belong to? \n(A) Carnivores\n(B) Herbivores\n(C) Omnivores\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.59375, "loss": 4.2393341064453125 }, { "id": "identity_296952", "image": "DocVQA/images/htng0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category of people is depicted in the document?" }, { "from": "gpt", "value": "The answer is invited-not present" } ], "clip_score": 17.15625, "loss": 6.2832536697387695 }, { "id": "identity_328136", "image": "A-OKVQA/images/224850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is this?\nChoices:\n(A) carnival\n(B) toy sale\n(C) fire safety\n(D) school event" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.609375, "loss": 4.166426658630371 }, { "id": "identity_200694", "image": "A-OKVQA/images/85225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.953125, "loss": 4.2343268394470215 }, { "id": "identity_22968", "image": "Super-CLEVR/images/superCLEVR_new_000546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a small matte thing in front of the large chopper; does it have the same shape as the shiny object that is in front of the tiny purple wagon?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.296875, "loss": 4.847595691680908 }, { "id": "identity_11577", "image": "TextVQA/images/2c9b85d8357eae90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the bottom left card?" }, { "from": "gpt", "value": "The answer is wight brothers" } ], "clip_score": 22.375, "loss": 5.001023292541504 }, { "id": "identity_300531", "image": "AI2D/images/3797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does \"B\" indicate?\nChoices:\n(A) Roots\n(B) Flower\n(C) Leaves\n(D) Stem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.859375, "loss": 4.423884868621826 }, { "id": "identity_538", "image": "AI2D/images/3797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What dose \"B\" represent?\nChoices:\n(A) Roots\n(B) Flower\n(C) Leaves\n(D) Stem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.0, "loss": 4.290530204772949 }, { "id": "identity_129789", "image": "A-OKVQA/images/293605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located to the left of the bicycle?" }, { "from": "gpt", "value": "The answer is A bag or travel accessory store" } ], "clip_score": 28.109375, "loss": 4.604934215545654 }, { "id": "identity_327784", "image": "PMC-VQA/images/PMC5586526_f1_212261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of MRI scan was done?\nChoices:\n(A) Axial T2-weighted scan of the brain and chest\n(B) Sagittal T1-weighted scan of the knee\n(C) Coronal T3-weighted scan of the abdomen\n(D) Axial T2-weighted scan of the heart" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.84375, "loss": 4.002478122711182 }, { "id": "identity_327825", "image": "PMC-VQA/images/PMC4229940_F3_335689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to identify this signal?\nChoices:\n(A) Magnetic Resonance Imaging (MRI) \n(B) Computed Tomography (CT) \n(C) X-ray \n(D) Ultrasonography" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.015625, "loss": 4.652906894683838 }, { "id": "identity_315304", "image": "PMC-VQA/images/PMC5440401_Fig1_167188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in this?\nChoices:\n(A) Direction of water flow \n(B) Normal distribution of AQP4 \n(C) Level of AQP4 expression \n(D) Relative normal AQP4 distribution" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.6328125, "loss": 4.473692893981934 }, { "id": "identity_48911", "image": "DVQA/images/bar_train_00160999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the 'campus' store?" }, { "from": "gpt", "value": "The answer is person" } ], "clip_score": 30.15625, "loss": 4.161375999450684 }, { "id": "identity_299804", "image": "DocVQA/images/hkxw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the program?" }, { "from": "gpt", "value": "The answer is Evergreen" } ], "clip_score": 18.0625, "loss": 4.351072788238525 }, { "id": "identity_211916", "image": "PMC-VQA/images/PMC4300775_Fig2_351504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screws are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.984375, "loss": 4.76057243347168 }, { "id": "identity_285189", "image": "VQA2.0/images/378637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items are located on the main table?" }, { "from": "gpt", "value": "The answer is plant" } ], "clip_score": 15.7109375, "loss": 5.079439163208008 }, { "id": "identity_269719", "image": "FigureQA/images/42130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive Drab the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.46875, "loss": 4.123110294342041 }, { "id": "identity_264339", "image": "IconQA/images/8154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you randomly choose a marble without seeing it, which color are you less likely to select?\nChoices:\n(A) blue\n(B) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.484375, "loss": 4.094327449798584 }, { "id": "identity_105808", "image": "VQA2.0/images/551299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the child's shirt?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 16.171875, "loss": 4.035595893859863 }, { "id": "identity_236272", "image": "Super-CLEVR/images/superCLEVR_new_000502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the purple car that is to the right of the large matte tandem bike, and what is the color of the bus that is to the left of the tandem bike?" }, { "from": "gpt", "value": "The answer is rubber, yellow" } ], "clip_score": 32.71875, "loss": 4.438902854919434 }, { "id": "identity_222292", "image": "VQA2.0/images/252759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cloth the person is holding?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 13.3125, "loss": 4.074684143066406 }, { "id": "identity_233682", "image": "PMC-VQA/images/PMC7767774_os12791-fig-0006_330604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the imaging modality used to obtain the image of the ACL graft, and how do these characteristics help in identifying the specific modality?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 26.5625, "loss": 4.7748565673828125 }, { "id": "identity_247976", "image": "DocVQA/images/rqxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of new items required per category, and how many categories have a number of new items required above this average?" }, { "from": "gpt", "value": "The answer is 3 categories" } ], "clip_score": 21.0, "loss": 4.011715888977051 }, { "id": "identity_59552", "image": "DocVQA/images/gldj0038_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the approval authority?" }, { "from": "gpt", "value": "The answer is Dr. Gary T. Bugger" } ], "clip_score": 24.15625, "loss": 4.529670238494873 }, { "id": "identity_328199", "image": "IconQA/images/79009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.40625, "loss": 4.090943336486816 }, { "id": "identity_314814", "image": "TextVQA/images/6b0fef85e2aac76d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word at the bottom?" }, { "from": "gpt", "value": "The answer is spaces" } ], "clip_score": 21.71875, "loss": 4.27016544342041 }, { "id": "identity_223545", "image": "Super-CLEVR/images/superCLEVR_new_001358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the large red bicycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 32.625, "loss": 4.411762714385986 }, { "id": "identity_262934", "image": "FigureQA/images/56529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.703125, "loss": 4.72290563583374 }, { "id": "identity_284996", "image": "ChartQA/images/4438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the Support Rep candidate in the chart?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 33.375, "loss": 4.197845935821533 }, { "id": "identity_9333", "image": "FigureQA/images/3070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.46875, "loss": 4.200136661529541 }, { "id": "identity_287392", "image": "PMC-VQA/images/PMC2679713_F1_38146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the imaging findings, which diagnosis is the most probable among the following options: (A) Cerebral infarction, (B) Meningitis, (C) Multiple Sclerosis, or (D) Parkinson's disease?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.609375, "loss": 4.130257606506348 }, { "id": "identity_22798", "image": "AI2D/images/4280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Biogenic methane is genrated by what?\nChoices:\n(A) thermogenic methane\n(B) bacterial actions\n(C) methane seeps\n(D) permafrost" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.796875, "loss": 4.24904203414917 }, { "id": "identity_5947", "image": "Super-CLEVR/images/superCLEVR_new_001946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more small cyan airliners that are on the left side of the tiny matte bus than large matte cars right of the yellow truck?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 35.84375, "loss": 4.974238395690918 }, { "id": "identity_282110", "image": "TextVQA/images/65d28c3dea03ea1d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On the red banner at the bottom, which continent is displayed?" }, { "from": "gpt", "value": "The answer is europe" } ], "clip_score": 12.1484375, "loss": 4.029847621917725 }, { "id": "identity_282307", "image": "VQA2.0/images/138153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object in black color that is reflected on the mirror?" }, { "from": "gpt", "value": "The answer is flowers" } ], "clip_score": 22.515625, "loss": 4.227696418762207 }, { "id": "identity_31286", "image": "A-OKVQA/images/83060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of bag is hanging from the cupboard?\nChoices:\n(A) grocery bag\n(B) backpack\n(C) purse\n(D) satchel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.875, "loss": 4.169943809509277 }, { "id": "identity_16758", "image": "FigureQA/images/40643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5, "loss": 4.105663299560547 }, { "id": "identity_15237", "image": "ScienceQA/images/15773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Idaho?\nChoices:\n(A) Carson City\n(B) Nampa\n(C) Boise\n(D) Sacramento" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.703125, "loss": 4.080532550811768 }, { "id": "identity_324506", "image": "FigureQA/images/40529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.233548164367676 }, { "id": "identity_58049", "image": "DocVQA/images/fhng0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model number of the Dictaphone Time Master Dictating Machine?" }, { "from": "gpt", "value": "The answer is P6" } ], "clip_score": 24.171875, "loss": 4.54916524887085 }, { "id": "identity_154099", "image": "ScienceQA/images/3986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the state directly south of the highlighted area?" }, { "from": "gpt", "value": "The answer is Connecticut" } ], "clip_score": 30.265625, "loss": 4.104097843170166 }, { "id": "identity_124571", "image": "VQA2.0/images/559747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many engines can be seen on the airplane?" }, { "from": "gpt", "value": "The answer is Two engines" } ], "clip_score": 22.59375, "loss": 4.653838634490967 }, { "id": "identity_1265", "image": "A-OKVQA/images/104309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When the pipes need to be worked on plumbers will be blocked from reaching it by what?\nChoices:\n(A) sink\n(B) wall\n(C) counter\n(D) microwave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.359375, "loss": 4.114516258239746 }, { "id": "identity_264705", "image": "FigureQA/images/19765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Saddle Brown or Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.9375, "loss": 4.774046897888184 }, { "id": "identity_63551", "image": "TQA/images/abc_question_images/parts_microscope_17191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the microscope is labeled with 'F'?" }, { "from": "gpt", "value": "The answer is Base" } ], "clip_score": 34.96875, "loss": 4.090517997741699 }, { "id": "identity_337761", "image": "ScienceQA/images/5778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Hilo\n(B) Honolulu\n(C) Helena\n(D) Missoula" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.140625, "loss": 4.32011079788208 }, { "id": "identity_73741", "image": "TQA/images/abc_question_images/earth_day_night_10170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter 'V' represent in the diagram?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 27.15625, "loss": 4.046606540679932 }, { "id": "identity_268751", "image": "ScienceQA/images/11668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this water pitcher?\nChoices:\n(A) plastic\n(B) leather" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.703125, "loss": 4.045591831207275 }, { "id": "identity_323530", "image": "DVQA/images/bar_train_00068408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of one of the bars?" }, { "from": "gpt", "value": "The answer is plot" } ], "clip_score": 23.140625, "loss": 4.707027435302734 }, { "id": "identity_323631", "image": "TextVQA/images/bc52509a6b896927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the back of this shirt?" }, { "from": "gpt", "value": "The answer is deeply satisfying " } ], "clip_score": 15.6484375, "loss": 5.509937763214111 }, { "id": "identity_296009", "image": "IconQA/images/26620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape of this object:\nOptions:\n(A) sphere\n(B) cylinder\n(C) cone\n(D) cube" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.421875, "loss": 4.189345359802246 }, { "id": "identity_211338", "image": "VQA2.0/images/255983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.734375, "loss": 4.507629871368408 }, { "id": "identity_324998", "image": "FigureQA/images/11579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.2265625, "loss": 4.052857398986816 }, { "id": "identity_288314", "image": "FigureQA/images/99016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the country with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.1875, "loss": 4.798405170440674 }, { "id": "identity_76018", "image": "ScienceQA/images/3473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the Arctic fox play in this food web?" }, { "from": "gpt", "value": "The answer is secondary consumer" } ], "clip_score": 30.8125, "loss": 4.27786922454834 }, { "id": "identity_283896", "image": "ScienceQA/images/4898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Alaska?\nChoices:\n(A) Honolulu\n(B) Juneau\n(C) Santa Fe\n(D) Fairbanks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.2295637130737305 }, { "id": "identity_304660", "image": "ScienceQA/images/10295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) salty\n(B) bendable" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.734375, "loss": 4.298493385314941 }, { "id": "identity_332667", "image": "DVQA/images/bar_train_00108996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is the most preferred in any group?" }, { "from": "gpt", "value": "The answer is detail" } ], "clip_score": 21.25, "loss": 4.152394771575928 }, { "id": "identity_336299", "image": "PMC-VQA/images/PMC4180546_Fig1_323925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located?\nChoices:\n(A) Right temporal lobe\n(B) Left temporal lobe\n(C) Frontal lobe\n(D) Parietal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 4.558351993560791 }, { "id": "identity_323564", "image": "A-OKVQA/images/201069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is most likely inside of the building next to these vehicles?\nChoices:\n(A) washing machine\n(B) firemen\n(C) clowns\n(D) atm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.796875, "loss": 4.779301643371582 }, { "id": "identity_272372", "image": "TQA/images/question_images/skin_cross_section_7580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of glands are accountable for causing acne during adolescence? \n(A) Eccrine sweat glands\n(B) Apocrine sweat glands\n(C) Sebaceous glands\n(D) Melanocytes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.37973690032959 }, { "id": "identity_331166", "image": "IconQA/images/103586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this object flat or solid?\nChoices:\n(A) solid\n(B) flat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.921875, "loss": 4.0746307373046875 }, { "id": "identity_190256", "image": "VQA2.0/images/498509.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign in the background?" }, { "from": "gpt", "value": "The answer is Club De Acrobatique La Masse" } ], "clip_score": 22.28125, "loss": 4.576344013214111 }, { "id": "identity_43699", "image": "DocVQA/images/kkln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.8125, "loss": 4.046278953552246 }, { "id": "identity_274774", "image": "A-OKVQA/images/58651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of transportation being used from the following options:\n(A) road\n(B) water\n(C) air\n(D) rail" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.84375, "loss": 4.214679718017578 }, { "id": "identity_336597", "image": "A-OKVQA/images/160459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside the pastry shown in the image?\nChoices:\n(A) jelly\n(B) cream\n(C) nothing\n(D) air" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.1484375, "loss": 4.238781929016113 }, { "id": "identity_118602", "image": "TextVQA/images/74d6f3f3e74ab89c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many characters are depicted in action poses on the comic cover?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.78125, "loss": 4.474189281463623 }, { "id": "identity_261410", "image": "VQA2.0/images/252779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where can the red and white stripes be found?" }, { "from": "gpt", "value": "The answer is on platforms" } ], "clip_score": 18.9375, "loss": 4.87184476852417 }, { "id": "identity_142706", "image": "MapQA/images/map_5116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a movie count in the range of 86-96?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 32.75, "loss": 4.223372936248779 }, { "id": "identity_312869", "image": "ScienceQA/images/17864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) New Jersey\n(B) Connecticut\n(C) New York\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.109375, "loss": 4.275822639465332 }, { "id": "identity_281743", "image": "VQA2.0/images/105728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the person gazing at the camera?" }, { "from": "gpt", "value": "The answer is cow" } ], "clip_score": 21.96875, "loss": 4.049405574798584 }, { "id": "identity_262317", "image": "Super-CLEVR/images/superCLEVR_new_001957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tandem bike located to the right of the brown matte mountain bike?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 19.171875, "loss": 4.319157600402832 }, { "id": "identity_268010", "image": "AI2D/images/4108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gas is released into the air during the cycle?\nChoices:\n(A) Oxygen\n(B) Sulfur\n(C) Calcium\n(D) Carbon dioxide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5625, "loss": 4.207110404968262 }, { "id": "identity_63111", "image": "TQA/images/question_images/human_system_digestive_3783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many parts of the small intestine are labeled in the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 34.40625, "loss": 4.0268354415893555 }, { "id": "identity_12347", "image": "IconQA/images/72919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a fish. Which picture is fourth?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.8125, "loss": 4.478417873382568 }, { "id": "identity_1388", "image": "VQA2.0/images/8431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the chair made of?" }, { "from": "gpt", "value": "The answer is thread" } ], "clip_score": 14.3671875, "loss": 5.747314453125 }, { "id": "identity_267024", "image": "IconQA/images/30095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.139172554016113 }, { "id": "identity_335405", "image": "ScienceQA/images/17024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Providence\n(B) Montgomery\n(C) Manchester\n(D) Concord" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.921875, "loss": 4.677992343902588 }, { "id": "identity_29427", "image": "PMC-VQA/images/PMC4186633_f1-ol-08-05-2059_325677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the red arrow? \nChoices:\n(A) Medulla Oblongata \n(B) Cavernous sinus \n(C) Auditory cortex \n(D) Vestibular aqueducts " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.518344402313232 }, { "id": "identity_310748", "image": "AI2D/images/3094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following encloses the cytoplasm of a cell?\nChoices:\n(A) Lysosomes\n(B) Ribosomes\n(C) Cell membrane\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.421875, "loss": 4.0687665939331055 }, { "id": "identity_37412", "image": "A-OKVQA/images/304493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does he have his arm out?\nChoices:\n(A) for balance\n(B) to signal\n(C) to catch\n(D) to wave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.734375, "loss": 4.3052215576171875 }, { "id": "identity_130565", "image": "A-OKVQA/images/201141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the container on the ground to the left of the person?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 15.71875, "loss": 4.464057922363281 }, { "id": "identity_286088", "image": "PMC-VQA/images/PMC4009138_Fig3_285691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where did the FB move in image e? \nChoices:\n(A) Left paraesophageal adipose space \n(B) Esophageal lumen \n(C) Right paraesophageal adipose space \n(D) Mediastinum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.287325382232666 }, { "id": "identity_268858", "image": "ScienceQA/images/12954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object possess?\nChoices:\n(A) flexible\n(B) sticky" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.4375, "loss": 4.14644718170166 }, { "id": "identity_36178", "image": "PMC-VQA/images/PMC5831712_Fig2_282086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the size of the lymph nodes? \nChoices:\n(A) Large \n(B) Small \n(C) Medium \n(D) Irrelevant " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.71875, "loss": 4.513913154602051 }, { "id": "identity_73662", "image": "ScienceQA/images/3408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible on the map?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.03125, "loss": 4.6701154708862305 }, { "id": "identity_24522", "image": "PMC-VQA/images/PMC7597045_Fig1_285913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) Left kidney \n(B) Right kidney \n(C) Liver \n(D) Spleen " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.609375, "loss": 4.01110315322876 }, { "id": "identity_285843", "image": "A-OKVQA/images/150769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material are the pipes made of that are supporting the old bike? \n(A) concrete \n(B) wood \n(C) sand \n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.15625, "loss": 4.073121070861816 }, { "id": "identity_304273", "image": "FigureQA/images/88953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Mint?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.3671875, "loss": 4.050139427185059 }, { "id": "identity_12385", "image": "VizWiz/images/VizWiz_train_00014937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this picture?" }, { "from": "gpt", "value": "The answer is facebook messenger dog rose in mouth" } ], "clip_score": 22.234375, "loss": 5.414798736572266 }, { "id": "identity_9210", "image": "FigureQA/images/74246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.671875, "loss": 4.195660591125488 }, { "id": "identity_109384", "image": "PMC-VQA/images/PMC9348820_F1_363019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any significant differences in the branching patterns between the two confocal images?" }, { "from": "gpt", "value": "The answer is Yes, less complex in Slit1KO" } ], "clip_score": 30.234375, "loss": 4.57070779800415 }, { "id": "identity_287755", "image": "FigureQA/images/97282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Deep Pink or Indigo?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.4375, "loss": 4.692382335662842 }, { "id": "identity_116251", "image": "VQA-AS/images/2843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.09375, "loss": 4.175556659698486 }, { "id": "identity_13021", "image": "PMC-VQA/images/PMC5975791_fig3_320603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the image shown in the caption?\nChoices:\n(A) Gaseous diffusion \n(B) Water diffusion \n(C) Solid-state diffusion \n(D) Liquid diffusion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.765625, "loss": 4.959090232849121 }, { "id": "identity_29722", "image": "PMC-VQA/images/PMC6206386_Fig4_387569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely cause of the patient's symptoms? \nChoices:\n(A) Traumatic brain injury \n(B) Stroke \n(C) Meningitis \n(D) Sinus infection " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.296875, "loss": 4.086862087249756 }, { "id": "identity_264752", "image": "FigureQA/images/79346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Saddle Brown or Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.15625, "loss": 4.630183696746826 }, { "id": "identity_28621", "image": "TextVQA/images/58928e8bb31269d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is on the banners in the back?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.390625, "loss": 4.051267147064209 }, { "id": "identity_19938", "image": "A-OKVQA/images/207513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the sign are these people painting?\nChoices:\n(A) top\n(B) middle\n(C) base\n(D) stop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.109375, "loss": 4.172106742858887 }, { "id": "identity_285027", "image": "AI2D/images/2637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which force is depicted in the diagram among the following choices: centrifugal, magnetic, none of the above, or centripetal?\nChoices:\n(A) centrifugal\n(B) magnetic\n(C) none of the above\n(D) centripetal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.5, "loss": 4.051259994506836 }, { "id": "identity_222558", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many houses are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.875, "loss": 4.610337257385254 }, { "id": "identity_282631", "image": "A-OKVQA/images/420236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of truck. Is it a:\n(A) mail\n(B) ice cream\n(C) suv\n(D) ambulance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5625, "loss": 4.44633674621582 }, { "id": "identity_126110", "image": "VizWiz/images/VizWiz_train_00007172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pamphlets can be seen on the second shelf from the top?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.375, "loss": 4.674010753631592 }, { "id": "identity_312989", "image": "TQA/images/abc_question_images/cycle_water_14929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter J indicate?\nChoices:\n(A) RUN OFF\n(B) PERCOLATION\n(C) PRECIPITATION\n(D) CONDENSATION" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.53125, "loss": 4.946473121643066 }, { "id": "identity_280233", "image": "FigureQA/images/94553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.4375, "loss": 4.541676998138428 }, { "id": "identity_309133", "image": "VizWiz/images/VizWiz_train_00018637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can you observe?" }, { "from": "gpt", "value": "The answer is water concrete bridge palm trees" } ], "clip_score": 24.96875, "loss": 4.451035499572754 }, { "id": "identity_127792", "image": "VQA2.0/images/145128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person holding anything in their left hand?" }, { "from": "gpt", "value": "The answer is Yes, a round object" } ], "clip_score": 14.4453125, "loss": 4.360076904296875 }, { "id": "identity_312311", "image": "FigureQA/images/56184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the Orange Red portion smaller than the Aqua portion?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.0993804931640625 }, { "id": "identity_186374", "image": "PMC-VQA/images/PMC3389242_Fig6_143829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the letter 'C' indicating in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is It is a label for the image view or sequence" } ], "clip_score": 15.09375, "loss": 4.356130599975586 }, { "id": "identity_278246", "image": "A-OKVQA/images/323853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the presence of black spots on the apples?\nChoices:\n(A) fresh\n(B) painted\n(C) stained\n(D) rotting" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.328125, "loss": 4.2053375244140625 }, { "id": "identity_28227", "image": "IconQA/images/79009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.3125, "loss": 4.310384750366211 }, { "id": "identity_10509", "image": "ScienceQA/images/19636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Ohio?\nChoices:\n(A) Boulder\n(B) Columbus\n(C) Portland\n(D) Cincinnati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.1875, "loss": 4.37042236328125 }, { "id": "identity_309902", "image": "FigureQA/images/86700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.53125, "loss": 4.01389217376709 }, { "id": "identity_1499", "image": "ScienceQA/images/20791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Hampshire?\nChoices:\n(A) Newport\n(B) Concord\n(C) Boston\n(D) Albany" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.375, "loss": 4.234152317047119 }, { "id": "identity_147340", "image": "DocVQA/images/mygl0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the letter?" }, { "from": "gpt", "value": "The answer is To invite a company to membership" } ], "clip_score": 23.546875, "loss": 4.4435200691223145 }, { "id": "identity_320816", "image": "IconQA/images/14906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a moon. What is the eighth image?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.765625, "loss": 4.047679424285889 }, { "id": "identity_15890", "image": "IconQA/images/70481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough acorns for every squirrel?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0, "loss": 4.392132759094238 }, { "id": "identity_296887", "image": "FigureQA/images/84544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Slate? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.96875, "loss": 4.178631782531738 }, { "id": "identity_11482", "image": "AI2D/images/3762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects to the mouth that stops food from entering the trachea?\nChoices:\n(A) diaphragm\n(B) epiglottis\n(C) bronchus\n(D) tongue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.34375, "loss": 4.25103235244751 }, { "id": "identity_289040", "image": "A-OKVQA/images/390345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ingredient is used to make the pavement out of the following options: copper, cement, sulfur, or gold?\nChoices:\n(A) copper\n(B) cement\n(C) sulfur\n(D) gold" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.125, "loss": 4.580157279968262 }, { "id": "identity_39275", "image": "FigureQA/images/20156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.75, "loss": 4.009398460388184 }, { "id": "identity_14638", "image": "A-OKVQA/images/489845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of room are the kids in?\nChoices:\n(A) bedroom\n(B) recreation\n(C) bathroom\n(D) kitchen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.265625, "loss": 4.301236152648926 }, { "id": "identity_315484", "image": "A-OKVQA/images/239001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the poled items used for?\nChoices:\n(A) block sun\n(B) block rain\n(C) block wind\n(D) block snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1171875, "loss": 4.775261878967285 }, { "id": "identity_328668", "image": "IconQA/images/2179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, what is the chance that it will be black?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 4.469159126281738 }, { "id": "identity_241702", "image": "A-OKVQA/images/229936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time will it be in one and a half hours?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 13.8515625, "loss": 4.345597743988037 }, { "id": "identity_300280", "image": "TextVQA/images/69eddefbe18eea44.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this?" }, { "from": "gpt", "value": "The answer is dd" } ], "clip_score": 15.65625, "loss": 5.049073219299316 }, { "id": "identity_288815", "image": "A-OKVQA/images/325532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: By what means are the objects in the sky powered?\nChoices:\n(A) gas\n(B) wind\n(C) sun\n(D) electricity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4609375, "loss": 4.209817409515381 }, { "id": "identity_276148", "image": "TextVQA/images/8d6ae29de134eeef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand of the typewriter?" }, { "from": "gpt", "value": "The answer is hermis" } ], "clip_score": 36.03125, "loss": 4.844217777252197 }, { "id": "identity_141432", "image": "MapQA/images/map_12001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall within the 79.5%-81.0% range?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 23.234375, "loss": 4.064110279083252 }, { "id": "identity_13016", "image": "Super-CLEVR/images/superCLEVR_new_000698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects right of the cyan object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.578125, "loss": 5.573099136352539 }, { "id": "identity_309829", "image": "Super-CLEVR/images/superCLEVR_new_001266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte object that has the same shape as the shiny red one?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 15.546875, "loss": 5.3094635009765625 }, { "id": "identity_2415", "image": "TextVQA/images/5cc2be2963e03cff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the second process?" }, { "from": "gpt", "value": "The answer is indexing" } ], "clip_score": 21.328125, "loss": 4.389820575714111 }, { "id": "identity_17215", "image": "Super-CLEVR/images/superCLEVR_new_000998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a shiny bicycle that is on the right side of the small brown matte minivan; are there any big green rubber things that are in front of it?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.640625, "loss": 5.284409523010254 }, { "id": "identity_307235", "image": "ScienceQA/images/13411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is a producer in this food web?\nChoices:\n(A) pine vole\n(B) bolete fungus\n(C) persimmon tree\n(D) gray fox" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.177828311920166 }, { "id": "identity_278507", "image": "PMC-VQA/images/PMC4244565_Fig2_339256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed by the X-ray assessment?\nChoices:\n(A) Poor healing of the fracture \n(B) Moderate healing of the fracture \n(C) Good healing of the fracture \n(D) Complete healing of the fracture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.6875, "loss": 4.844237327575684 }, { "id": "identity_18933", "image": "IconQA/images/54607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.90625, "loss": 4.5409770011901855 }, { "id": "identity_152715", "image": "TQA/images/question_images/waves_interactions_interference_8230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of wave A before interference?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 30.421875, "loss": 4.537856101989746 }, { "id": "identity_333772", "image": "ScienceQA/images/7978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Jefferson City\n(B) Richmond\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.150566101074219 }, { "id": "identity_59946", "image": "DocVQA/images/gpmp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What action is being requested regarding Dr. Kent's telephone information?" }, { "from": "gpt", "value": "The answer is Change your Telephone List" } ], "clip_score": 18.46875, "loss": 4.1992621421813965 }, { "id": "identity_338261", "image": "A-OKVQA/images/64800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What seems to be sold outside this silver trailer?\nChoices:\n(A) ice cream\n(B) cupcakes\n(C) hamburgers\n(D) hot dogs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.077744483947754 }, { "id": "identity_32242", "image": "A-OKVQA/images/267885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the men doing on top of the elephants?\nChoices:\n(A) feeding them\n(B) riding them\n(C) selling them\n(D) cleaning them" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.5511908531188965 }, { "id": "identity_282990", "image": "PMC-VQA/images/PMC6200443_FIG1_385685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the brain is impacted by the atrophy? Is it:\n(A) Frontal lobe\n(B) Temporal lobe\n(C) Parietal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.6875, "loss": 4.387206554412842 }, { "id": "identity_198604", "image": "VQA2.0/images/528642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible pillows are there on the bed?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.5625, "loss": 4.77930212020874 }, { "id": "identity_233547", "image": "TextVQA/images/45f35dc977bcc613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific precautions should be taken in this area, considering the visibility conditions and the warning sign?" }, { "from": "gpt", "value": "The answer is Ensure clear area, use signals, wear high-visibility clothing, add markers" } ], "clip_score": 19.984375, "loss": 4.09574031829834 }, { "id": "identity_20998", "image": "DocVQA/images/lkbl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: from whom the memo was received ?" }, { "from": "gpt", "value": "The answer is David K. Silver" } ], "clip_score": 23.375, "loss": 4.251379489898682 }, { "id": "identity_308820", "image": "IconQA/images/81468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.169470310211182 }, { "id": "identity_123534", "image": "PMC-VQA/images/PMC5295295_ijerph-14-00044-f002_123866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any indications of pathology in this tissue sample?" }, { "from": "gpt", "value": "The answer is Possible indications of pathology present" } ], "clip_score": 34.9375, "loss": 4.008319854736328 }, { "id": "identity_290729", "image": "PMC-VQA/images/PMC7815715_Fig3_345484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of OCT scan is depicted in the image provided?\nChoices:\n(A) Transverse (T) \n(B) En face (E) \n(C) Vertical (V) \n(D) Horizontal (H)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.683220863342285 }, { "id": "identity_325123", "image": "ScienceQA/images/19802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Springfield\n(B) Arlington\n(C) Minneapolis\n(D) Saint Paul" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.265625, "loss": 4.1304426193237305 }, { "id": "identity_21485", "image": "Super-CLEVR/images/superCLEVR_new_001217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any matte buss of the same size as the red rubber thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.765625, "loss": 5.168547630310059 }, { "id": "identity_293873", "image": "PMC-VQA/images/PMC6428891_Fig13_455281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the CT image, which vein displays a thrombus?\nChoices:\n(A) Right common iliac vein\n(B) Left portal vein\n(C) Right portal vein\n(D) Left common iliac vein" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.890625, "loss": 4.042985439300537 }, { "id": "identity_27243", "image": "A-OKVQA/images/46155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this player hoping to keep up?\nChoices:\n(A) volley\n(B) net\n(C) complaints\n(D) emotions" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.796875, "loss": 4.626536846160889 }, { "id": "identity_320916", "image": "A-OKVQA/images/39592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why might these animals be gathering here?\nChoices:\n(A) treats\n(B) fear\n(C) curiosity\n(D) attention" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.859375, "loss": 4.138796806335449 }, { "id": "identity_152037", "image": "ScienceQA/images/4704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bird doing with its neck?" }, { "from": "gpt", "value": "The answer is Curving" } ], "clip_score": 22.546875, "loss": 4.533949375152588 }, { "id": "identity_130444", "image": "A-OKVQA/images/119513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the audience behind the fence?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 12.8046875, "loss": 4.697037220001221 }, { "id": "identity_284293", "image": "ScienceQA/images/20389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Haley's experiment to address?\nChoices:\n(A) Can the growth of tomato seedlings be influenced by the humidity level of the planting environment?\n(B) Is there a difference in the number of tomato seedlings that grow when planted in soil with fertilizer versus soil without fertilizer?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.796875, "loss": 4.160921096801758 }, { "id": "identity_63108", "image": "AI2D/images/1990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the field mouse in this food web?" }, { "from": "gpt", "value": "The answer is Primary and secondary consumer" } ], "clip_score": 30.359375, "loss": 4.199817180633545 }, { "id": "identity_318073", "image": "AI2D/images/1517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occupies the space outside of the nucleus?\nChoices:\n(A) neutrons\n(B) gas\n(C) electrons\n(D) protons" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.1664509773254395 }, { "id": "identity_317578", "image": "IconQA/images/75873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the third image?\nChoices:\n(A) house\n(B) dog\n(C) paw" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.375, "loss": 4.717230319976807 }, { "id": "identity_304853", "image": "Super-CLEVR/images/superCLEVR_new_000572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the two bikes the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.5625, "loss": 5.2680768966674805 }, { "id": "identity_48469", "image": "MapQA/images/map_955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have hearings in the range of 43354-48624?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 36.59375, "loss": 4.083924770355225 }, { "id": "identity_218855", "image": "VQA2.0/images/504338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bow ties are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.5, "loss": 4.383187294006348 }, { "id": "identity_1586", "image": "ScienceQA/images/1374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Idaho?\nChoices:\n(A) Newark\n(B) Lansing\n(C) Boise\n(D) Missoula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5, "loss": 4.200698375701904 }, { "id": "identity_19071", "image": "FigureQA/images/62466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Olive have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.372296333312988 }, { "id": "identity_282503", "image": "AI2D/images/844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outcome of an egg?\nChoices:\n(A) Adult.\n(B) Larva.\n(C) Pupa.\n(D) Egg." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.265625, "loss": 4.402900218963623 }, { "id": "identity_6830", "image": "ScienceQA/images/7849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Washington, D.C.\n(B) Vermont\n(C) South Carolina\n(D) Maryland" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.65625, "loss": 4.032927989959717 }, { "id": "identity_252807", "image": "VizWiz/images/VizWiz_train_00023586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible elements on the screen, and what can you infer about the context of the image based on these elements?" }, { "from": "gpt", "value": "The answer is User is working in a corporate email and document environment" } ], "clip_score": 26.28125, "loss": 4.453409194946289 }, { "id": "identity_298468", "image": "PMC-VQA/images/PMC6315342_genes-09-00605-f001_417213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure pointed to by the white arrow in the MRI image. Is it:\n(A) Mandibular canal\n(B) Maxillary sinus\n(C) Molar Tooth Sign\n(D) Temporomandibular joint" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 4.156876564025879 }, { "id": "identity_294927", "image": "A-OKVQA/images/13944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of repairs is the person being lifted doing? Is it related to animal remediation, potholes, electrical work, or road signs?\nChoices:\n(A) animal remediation\n(B) pothole\n(C) electrical\n(D) road sign" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.471932888031006 }, { "id": "identity_132046", "image": "VQA2.0/images/88575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras have their tails visible?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.484375, "loss": 4.7350172996521 }, { "id": "identity_297690", "image": "TQA/images/abc_question_images/photosynthesis_11263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this diagram, which letter represents chloroplasts?\nChoices:\n(A) X\n(B) P\n(C) E\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.71875, "loss": 4.403700351715088 }, { "id": "identity_109216", "image": "VizWiz/images/VizWiz_train_00012718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper items can be seen on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.8125, "loss": 4.8562493324279785 }, { "id": "identity_285657", "image": "TextVQA/images/97844cef7a4c8568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand or company associated with this product?" }, { "from": "gpt", "value": "The answer is no text in image" } ], "clip_score": 18.453125, "loss": 4.158034801483154 }, { "id": "identity_37310", "image": "PMC-VQA/images/PMC4822243_Fig4_18737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image shows hyperenhancement at the border zone but not the core of ablated lesion after FUS treatment? \nChoices:\n(A) T2-FSE image \n(B) DCE T1-FSE image \n(C) T1-FSE image \n(D) DWI image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.484375, "loss": 4.250502109527588 }, { "id": "identity_287887", "image": "A-OKVQA/images/367608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which skate trick is the man performing? Is it a flip, grind, grab, or manual?\nChoices:\n(A) flip\n(B) grind\n(C) grab\n(D) manual" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0625, "loss": 4.299421787261963 }, { "id": "identity_298737", "image": "A-OKVQA/images/266168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found beside the tracks?\nChoices:\n(A) walking couple\n(B) dog\n(C) tall lights\n(D) cat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.203125, "loss": 4.499266624450684 }, { "id": "identity_8332", "image": "ScienceQA/images/12433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nEarth is the largest planet that is made mainly of rock.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.734375, "loss": 4.001661777496338 }, { "id": "identity_289585", "image": "A-OKVQA/images/67115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located in the center of a giraffe's head?\nChoices:\n(A) sting\n(B) single horn\n(C) ossicones\n(D) cornet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.78125, "loss": 4.340447425842285 }, { "id": "identity_258779", "image": "VQA2.0/images/151709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific type of plane is in the photo, and what features distinguish it from other types of planes?" }, { "from": "gpt", "value": "The answer is floatplane" } ], "clip_score": 29.578125, "loss": 4.5959954261779785 }, { "id": "identity_239838", "image": "PMC-VQA/images/PMC6720874_sensors-19-03522-f014_31921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of the wafer that was not etched compared to the etched wafer in the accelerometer, and how can you identify them in the image?" }, { "from": "gpt", "value": "The answer is Smooth, uniform surface without patterns or structures" } ], "clip_score": 35.3125, "loss": 4.0987114906311035 }, { "id": "identity_45609", "image": "PlotQA/images/138553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest percentage in the year 2015?" }, { "from": "gpt", "value": "The answer is Finland" } ], "clip_score": 25.171875, "loss": 4.756309509277344 }, { "id": "identity_315391", "image": "PMC-VQA/images/PMC4074584_F4_302021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrows in this image?\nChoices:\n(A) Different sized mineral-like structures on the fibrillar surface\n(B) Merged bacterial colonies in a root canal\n(C) A virus destroying the pulp cell spheres\n(D) Different sized fibrils on the mineral-like surface" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.070765972137451 }, { "id": "identity_10122", "image": "ScienceQA/images/5687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Newport\n(B) Providence\n(C) Montpelier\n(D) Concord" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.765625, "loss": 4.154592990875244 }, { "id": "identity_17785", "image": "TabMWP/images/34990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.40625, "loss": 4.151067733764648 }, { "id": "identity_290026", "image": "IconQA/images/58090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the time displayed on the clock?\nChoices:\n(A) 11:00\n(B) 12:00" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.765625, "loss": 4.251343727111816 }, { "id": "identity_16575", "image": "DVQA/images/bar_train_00176523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is home" } ], "clip_score": 19.03125, "loss": 4.046787738800049 }, { "id": "identity_327256", "image": "PMC-VQA/images/PMC6780818_nanomaterials-09-01195-f013_47195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of microscope was used to create this image?\nChoices:\n(A) Optical microscope\n(B) Electron microscope\n(C) Scanning probe microscope\n(D) X-ray microscope" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.11887264251709 }, { "id": "identity_110222", "image": "VizWiz/images/VizWiz_train_00023808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object on the computer screen?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.65625, "loss": 4.341049671173096 }, { "id": "identity_338115", "image": "ScienceQA/images/19540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Spokane\n(B) Seattle\n(C) Olympia\n(D) Denver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.09375, "loss": 4.151659965515137 }, { "id": "identity_230148", "image": "VizWiz/images/VizWiz_train_00010815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand and roast type of the coffee pod shown in the image?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.515625, "loss": 4.446747303009033 }, { "id": "identity_305061", "image": "TextVQA/images/8a11469dcd4fba7d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of blonde is this?" }, { "from": "gpt", "value": "The answer is dirty" } ], "clip_score": 16.625, "loss": 4.351288795471191 }, { "id": "identity_1164", "image": "AI2D/images/1079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the middle of the nucleus?\nChoices:\n(A) nucleolus\n(B) ribosome\n(C) none of the above\n(D) lysosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.671875, "loss": 4.293553352355957 }, { "id": "identity_280813", "image": "AI2D/images/4847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object on Earth is depicted in the diagram? Is it a river, mountain/volcano, hill, or tree?\nChoices:\n(A) River\n(B) Mountain/Volcano\n(C) Hill\n(D) Tree" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.609375, "loss": 4.504016399383545 }, { "id": "identity_282321", "image": "PMC-VQA/images/PMC5664813_Fig5_233716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image provided?\nChoices:\n(A) A ripe wheat spike \n(B) A wheat grain \n(C) Isolated wheat grains \n(D) A wheat seed." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.265625, "loss": 4.361489772796631 }, { "id": "identity_315273", "image": "ScienceQA/images/15565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's skin is more effective as a warning sign to deter predators?\nChoices:\n(A) impala\n(B) crown-of-thorns sea star" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.609375, "loss": 4.144631385803223 }, { "id": "identity_269418", "image": "TQA/images/abc_question_images/human_system_digestive_16089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the letter D in the given choices?\n(A) STOMACH\n(B) COLON\n(C) PANCREAS\n(D) LIVER" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.28125, "loss": 4.283334732055664 }, { "id": "identity_209619", "image": "VQA2.0/images/488775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are leaning against the object?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 19.625, "loss": 4.3402509689331055 }, { "id": "identity_109535", "image": "VQA2.0/images/70749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of shot is the tennis player executing?" }, { "from": "gpt", "value": "The answer is A serve" } ], "clip_score": 30.46875, "loss": 4.172362327575684 }, { "id": "identity_25888", "image": "PMC-VQA/images/PMC6417186_Fig1_450518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is shown in the image?\nChoices:\n(A) Sagittal View \n(B) Anterior View \n(C) Lateral View \n(D) Posterior View" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.78125, "loss": 4.222945213317871 }, { "id": "identity_220658", "image": "VQA2.0/images/256683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.328125, "loss": 4.406728267669678 }, { "id": "identity_292938", "image": "Super-CLEVR/images/superCLEVR_new_001804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shapes of the small shiny object and the red matte thing identical?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.0625, "loss": 5.348860263824463 }, { "id": "identity_314838", "image": "FigureQA/images/96894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.734375, "loss": 4.049416542053223 }, { "id": "identity_26567", "image": "Super-CLEVR/images/superCLEVR_new_000897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the small brown thing that is the same material as the sedan?" }, { "from": "gpt", "value": "The answer is chopper" } ], "clip_score": 24.234375, "loss": 4.35856294631958 }, { "id": "identity_127259", "image": "Super-CLEVR/images/superCLEVR_new_000666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is positioned closest to the center of the image?" }, { "from": "gpt", "value": "The answer is Green matte car" } ], "clip_score": 26.40625, "loss": 4.852802276611328 }, { "id": "identity_117196", "image": "VQA2.0/images/572469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rearview mirrors are visible on the truck?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.671875, "loss": 4.166295528411865 }, { "id": "identity_309975", "image": "IconQA/images/35125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a rocket. Which image is third?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.295126438140869 }, { "id": "identity_211841", "image": "VQA2.0/images/381163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.234375, "loss": 4.166342735290527 }, { "id": "identity_32949", "image": "PMC-VQA/images/PMC8155358_F2_452888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes of the brain show abnormal signals in the MRI image? \nChoices:\n(A) Temporal and Parietal lobes \n(B) Occipital and Insular lobes \n(C) Frontal and Insular lobes \n(D) Parietal and Frontal lobes " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.3125, "loss": 4.012093544006348 }, { "id": "identity_71494", "image": "AI2D/images/2070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food chains can be identified in Food Web R?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 33.8125, "loss": 4.480489253997803 }, { "id": "identity_3694", "image": "A-OKVQA/images/238448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the right and left of the clock?\nChoices:\n(A) cats\n(B) reptiles\n(C) birds\n(D) statues" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.671875, "loss": 4.036860466003418 }, { "id": "identity_273776", "image": "ScienceQA/images/8125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Alaska?\nChoices:\n(A) Seattle\n(B) Juneau\n(C) Saint Paul\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.46875, "loss": 4.220038414001465 }, { "id": "identity_276077", "image": "AI2D/images/946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is represented by the letter D in the image?\nChoices:\n(A) Eagle\n(B) Parrot\n(C) Duck\n(D) Finch" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.15625, "loss": 4.624657154083252 }, { "id": "identity_37026", "image": "ScienceQA/images/14966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) smooth\n(B) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.4453125, "loss": 4.416879177093506 }, { "id": "identity_252918", "image": "TQA/images/abc_question_images/human_system_nervous_16172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the letter labels for the brainstem and the cerebellum, and then determine the label for the structure located directly above the cerebellum" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 31.171875, "loss": 4.618819236755371 }, { "id": "identity_308485", "image": "AI2D/images/2079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the provided food web, what is the primary energy source?\nChoices:\n(A) sun\n(B) bird\n(C) algae\n(D) crab" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.375, "loss": 4.199465751647949 }, { "id": "identity_107185", "image": "VQA2.0/images/445503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding a beverage?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.359375, "loss": 5.366385459899902 }, { "id": "identity_322050", "image": "PMC-VQA/images/PMC4149355_pone-0099655-g001_316615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white line in the image indicate?\nChoices:\n(A) Beginning of the observation\n(B) Ending of the observation\n(C) Middle of the observation\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.921875, "loss": 4.334399223327637 }, { "id": "identity_327802", "image": "PMC-VQA/images/PMC6398457_BIO036939F3_443769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in this image?\nChoices:\n(A) The presence of a ladder-like structure between two cyst cells.\n(B) The presence of a septa-like pattern between two cyst cells.\n(C) The presence of magnified spermatid heads.\n(D) No indication is given." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.890625, "loss": 4.22251033782959 }, { "id": "identity_27851", "image": "PMC-VQA/images/PMC7252828_fg003_179226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the left image in comparison to the right image? \nChoices:\n(A) Successful ablation in the left image and unsuccessful ablation in the right image. \n(B) Successful ablation in the right image and unsuccessful ablation in the left image. \n(C) Successful ablation in both images. \n(D) Unsuccessful ablation in both images. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.921875, "loss": 4.110276699066162 }, { "id": "identity_287631", "image": "PMC-VQA/images/PMC4450475_Fig5_391776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended use of the images depicted in the figure?\nChoices:\n(A) To illustrate the location of the carotid artery \n(B) To illustrate the location of the cochlea \n(C) To illustrate the bony overhang around the round window \n(D) To illustrate the bony overhang around the oval window" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0625, "loss": 4.095237731933594 }, { "id": "identity_298092", "image": "A-OKVQA/images/578945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is Tapestry situated?\nChoices:\n(A) California\n(B) London\n(C) None\n(D) New York" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.9296875, "loss": 4.194046974182129 }, { "id": "identity_284390", "image": "A-OKVQA/images/540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended action of the large vehicle in preparation?\nChoices:\n(A) fire missiles\n(B) race cars\n(C) race camels\n(D) fly" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.234375, "loss": 4.512782573699951 }, { "id": "identity_221950", "image": "A-OKVQA/images/580082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking in the background?" }, { "from": "gpt", "value": "The answer is Six people" } ], "clip_score": 23.28125, "loss": 4.4635772705078125 }, { "id": "identity_291420", "image": "DocVQA/images/jybx0223_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the industry largely viewing the GST on cigarettes?" }, { "from": "gpt", "value": "The answer is tax on tax" } ], "clip_score": 16.09375, "loss": 5.341980934143066 }, { "id": "identity_33371", "image": "PMC-VQA/images/PMC7825226_Fig1_349343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in the figure? \nChoices:\n(A) Computed tomography (CT) scan \n(B) Positron emission tomography (PET) scan \n(C) Magnetic resonance imaging (MRI) \n(D) X-ray scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 4.157225608825684 }, { "id": "identity_77587", "image": "ScienceQA/images/4815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are primarily used as a dessert or sweet treat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.53125, "loss": 4.840681552886963 }, { "id": "identity_73144", "image": "AI2D/images/801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role do the Deer and Rabbit play in this ecosystem?" }, { "from": "gpt", "value": "The answer is Primary consumers" } ], "clip_score": 27.3125, "loss": 4.619826316833496 }, { "id": "identity_215104", "image": "A-OKVQA/images/333978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pie container on the bottom shelf?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 21.328125, "loss": 4.650854587554932 }, { "id": "identity_111929", "image": "VQA2.0/images/33687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen walking on the sidewalk?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.28125, "loss": 4.7296624183654785 }, { "id": "identity_285048", "image": "A-OKVQA/images/68231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to the planes near the dock?\nChoices:\n(A) airbus\n(B) jet\n(C) commuter\n(D) seaplane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.96875, "loss": 4.128130912780762 }, { "id": "identity_31444", "image": "A-OKVQA/images/60387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are people doing?\nChoices:\n(A) waiting\n(B) smoking\n(C) drinking\n(D) eating" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.796875, "loss": 4.24080228805542 }, { "id": "identity_35794", "image": "A-OKVQA/images/508962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What character does the man looking at his cell phone play?\nChoices:\n(A) mary poppins\n(B) jesus\n(C) sweeny todd\n(D) santa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.609375, "loss": 4.085463047027588 }, { "id": "identity_296669", "image": "ScienceQA/images/5635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Massachusetts\n(B) North Carolina\n(C) New Jersey\n(D) Ohio" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.140625, "loss": 4.347777366638184 }, { "id": "identity_318689", "image": "Super-CLEVR/images/superCLEVR_new_000685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a big motorbike behind a yellow motorbike behind a green double bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.21875, "loss": 5.002958297729492 }, { "id": "identity_328852", "image": "TextVQA/images/f6b2edd96154413b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the building on the right say?" }, { "from": "gpt", "value": "The answer is rackhams " } ], "clip_score": 23.953125, "loss": 4.44526481628418 }, { "id": "identity_73789", "image": "TQA/images/question_images/human_system_digestive_3713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many salivary glands are indicated in the diagram?" }, { "from": "gpt", "value": "The answer is One label (implying one pair)" } ], "clip_score": 28.703125, "loss": 4.96481466293335 }, { "id": "identity_325710", "image": "AI2D/images/2866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents this part of the ear?\nChoices:\n(A) b\n(B) l\n(C) r\n(D) i" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.46875, "loss": 4.465237617492676 }, { "id": "identity_35245", "image": "PMC-VQA/images/PMC4458878_f4_394192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures indicate the actin-rich nodule core in the image? \nChoices:\n(A) talin \n(B) vinculin \n(C) both A and B \n(D) none of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 4.242118835449219 }, { "id": "identity_219612", "image": "A-OKVQA/images/57940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity taking place in the image?" }, { "from": "gpt", "value": "The answer is Skiing" } ], "clip_score": 23.75, "loss": 4.102204322814941 }, { "id": "identity_105345", "image": "Super-CLEVR/images/superCLEVR_new_001192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the purple object and has two wheels?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.75, "loss": 5.081570625305176 }, { "id": "identity_38464", "image": "TextVQA/images/233a9ff6b3175939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the model of this aircraft?" }, { "from": "gpt", "value": "The answer is 17 gc mcrexf" } ], "clip_score": 22.890625, "loss": 4.867476463317871 }, { "id": "identity_321236", "image": "AI2D/images/3382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the anther?\nChoices:\n(A) D\n(B) C\n(C) A\n(D) B" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.7265625, "loss": 4.03898811340332 }, { "id": "identity_319698", "image": "TQA/images/question_images/nuclear_energy_8120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage follows N?\nChoices:\n(A) Rb-92\n(B) Cs-140\n(C) U-235\n(D) 200 MeV" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.0625, "loss": 4.038698673248291 }, { "id": "identity_323507", "image": "A-OKVQA/images/362005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area they are skating in made of?\nChoices:\n(A) metal\n(B) plastic\n(C) concrete\n(D) wood" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.015625, "loss": 4.208854675292969 }, { "id": "identity_30107", "image": "PMC-VQA/images/PMC6737727_F3_36291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the slab-like signal in the image? \nChoices:\n(A) High-signal \n(B) Low-signal \n(C) Irregular \n(D) Circular " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.717700004577637 }, { "id": "identity_314990", "image": "PMC-VQA/images/PMC7797455_FIG3_339777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are more extensively affected on these images?\nChoices:\n(A) Upper lobes \n(B) Middle lobes \n(C) Lower lobes \n(D) All lobes are equally affected" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.359375, "loss": 4.5282301902771 }, { "id": "identity_284674", "image": "PMC-VQA/images/PMC6945702_Fig1_95195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this case, which section of the liver is impacted?\nChoices:\n(A) Right lobe\n(B) Left lobe\n(C) Caudate lobe\n(D) Quadrate lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 4.4873809814453125 }, { "id": "identity_129715", "image": "VQA2.0/images/205443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign attached to the blue table?" }, { "from": "gpt", "value": "The answer is WE PEN" } ], "clip_score": 19.921875, "loss": 4.153468132019043 }, { "id": "identity_313194", "image": "FigureQA/images/77421.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Light Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.9375, "loss": 4.1135358810424805 }, { "id": "identity_299092", "image": "AI2D/images/4197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the substances from which an object is made or can be made?\nChoices:\n(A) Cycle\n(B) Material\n(C) Cooling\n(D) Molten" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.15625, "loss": 4.538769721984863 }, { "id": "identity_322381", "image": "ScienceQA/images/11836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) flexible\n(B) translucent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5, "loss": 4.243828773498535 }, { "id": "identity_28365", "image": "TextVQA/images/ded539a3c4ce7f58.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does it say under the very first coffee pot?" }, { "from": "gpt", "value": "The answer is bev 4" } ], "clip_score": 27.78125, "loss": 4.775345325469971 }, { "id": "identity_187771", "image": "A-OKVQA/images/206394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seats are visible inside the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.859375, "loss": 4.305103778839111 }, { "id": "identity_265058", "image": "Super-CLEVR/images/superCLEVR_new_000898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the green object that shares the same shape as the large gray rubber thing?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 16.484375, "loss": 4.079949378967285 }, { "id": "identity_184266", "image": "PMC-VQA/images/PMC6758606_F0003_41055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct ligand molecules are visible in the binding pocket?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.203125, "loss": 4.455124855041504 }, { "id": "identity_324773", "image": "PMC-VQA/images/PMC4693018_fig2_456954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of window images were used to identify these lesions?\nChoices:\n(A) Coronal bone window images.\n(B) Sagittal bone window images.\n(C) Axial bone window images.\n(D) Oblique bone window images." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.65625, "loss": 4.1833319664001465 }, { "id": "identity_37198", "image": "A-OKVQA/images/431092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man doing with the blue wand?\nChoices:\n(A) waving\n(B) digging\n(C) scratching\n(D) playing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.015625, "loss": 4.088345527648926 }, { "id": "identity_291038", "image": "ScienceQA/images/17615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? \nOptions:\n(A) Saint Lucia\n(B) Haiti\n(C) Saint Vincent and the Grenadines\n(D) Barbados" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.546875, "loss": 4.21347713470459 }, { "id": "identity_220497", "image": "VQA2.0/images/162578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing blue jerseys?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.875, "loss": 4.2540764808654785 }, { "id": "identity_271516", "image": "PMC-VQA/images/PMC8122658_ijms-22-04325-f005_440908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the substances depicted in the image. Are they:\n(A) Polymers\n(B) Crystals\n(C) Fibers\n(D) Metals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.59375, "loss": 5.221655368804932 }, { "id": "identity_29938", "image": "PMC-VQA/images/PMC2553761_F5_28181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the EGFP fluorescence in the merged image? \nChoices:\n(A) Magenta \n(B) Green \n(C) Yellow \n(D) Blue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.1875, "loss": 4.09670877456665 }, { "id": "identity_333453", "image": "PMC-VQA/images/PMC3900399_pone-0082707-g005_260972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the T-bar in this represent?\nChoices:\n(A) Blood Flow\n(B) Electrical Activity\n(C) Volume Difference\n(D) Cellular Activity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.484375, "loss": 4.9452314376831055 }, { "id": "identity_325807", "image": "DocVQA/images/nsyg0024_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of help is IBM providing?" }, { "from": "gpt", "value": "The answer is stress testing the application" } ], "clip_score": 28.375, "loss": 4.78336238861084 }, { "id": "identity_276745", "image": "PMC-VQA/images/PMC5627011_F1_223853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is utilized in this case? \nOptions:\n(A) Computer Tomography \n(B) Magnetic Resonance Imaging \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5625, "loss": 4.1152544021606445 }, { "id": "identity_289356", "image": "ScienceQA/images/12805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Fairbanks\n(C) Phoenix\n(D) Boise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.109375, "loss": 4.146311283111572 }, { "id": "identity_281040", "image": "TextVQA/images/4ec44c66f6a886d5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the machine's name inside the blue box?" }, { "from": "gpt", "value": "The answer is shopmaster" } ], "clip_score": 22.109375, "loss": 4.593224048614502 }, { "id": "identity_3759", "image": "AI2D/images/3470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter M represent?\nChoices:\n(A) Pharynx\n(B) Heart\n(C) Esophagus\n(D) Dorsal Aorta" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.015625, "loss": 4.6653642654418945 }, { "id": "identity_300384", "image": "TabMWP/images/12032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.043912410736084 }, { "id": "identity_325584", "image": "A-OKVQA/images/477807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action by the rider puts the dogs at risk of falling off?\nChoices:\n(A) stops\n(B) speeds\n(C) yells\n(D) sings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.40625, "loss": 4.279932498931885 }, { "id": "identity_69739", "image": "AI2D/images/511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals in this food web are prey for the coyote?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.4375, "loss": 4.6961350440979 }, { "id": "identity_313801", "image": "DVQA/images/bar_train_00018876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.125, "loss": 4.109855651855469 }, { "id": "identity_322654", "image": "PMC-VQA/images/PMC7447497_F1_240040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the superior sagittal sinus is thrombosed in this patient?\nChoices:\n(A) Anterior segment\n(B) Middle segment\n(C) Parietal segment\n(D) Occipital segment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.625, "loss": 4.242118835449219 }, { "id": "identity_263467", "image": "A-OKVQA/images/107960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way does this vehicle establish contact with the ground?\nChoices:\n(A) rail\n(B) hooves\n(C) track\n(D) wheels" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.9140625, "loss": 4.198294639587402 }, { "id": "identity_285909", "image": "A-OKVQA/images/137108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the target of the person who is swatting?\nChoices:\n(A) fly\n(B) hungry bear\n(C) ant\n(D) tennis ball" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.359375, "loss": 4.572228908538818 }, { "id": "identity_42814", "image": "DocVQA/images/hqvw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the status of the manuscript with the article title \"WMC(2)\"?" }, { "from": "gpt", "value": "The answer is Sign-off process completed, manuscript is with DesignWrite to prepare submission package" } ], "clip_score": 14.109375, "loss": 4.692775726318359 }, { "id": "identity_189806", "image": "A-OKVQA/images/34624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are on the shelf?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.03125, "loss": 5.885935306549072 }, { "id": "identity_137673", "image": "DVQA/images/bar_train_00088904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the highest value in the 'auto' group?" }, { "from": "gpt", "value": "The answer is suite" } ], "clip_score": 18.484375, "loss": 4.050233840942383 }, { "id": "identity_298486", "image": "FigureQA/images/2934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Aqua the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.1875, "loss": 4.434994697570801 }, { "id": "identity_201195", "image": "A-OKVQA/images/162753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dodgeballs are on the court?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 28.796875, "loss": 4.047947883605957 }, { "id": "identity_311014", "image": "DocVQA/images/fykp0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the place?" }, { "from": "gpt", "value": "The answer is N.Y" } ], "clip_score": 15.375, "loss": 4.064298152923584 }, { "id": "identity_2462", "image": "VQA2.0/images/168417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the woman's hands?" }, { "from": "gpt", "value": "The answer is pocket" } ], "clip_score": 11.90625, "loss": 4.063368797302246 }, { "id": "identity_108292", "image": "A-OKVQA/images/230522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any humans in the image?" }, { "from": "gpt", "value": "The answer is Yes, one human" } ], "clip_score": 17.953125, "loss": 4.292850494384766 }, { "id": "identity_318110", "image": "IconQA/images/43362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.3610053062438965 }, { "id": "identity_1937", "image": "IconQA/images/90979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.421875, "loss": 4.222636699676514 }, { "id": "identity_281665", "image": "ScienceQA/images/1887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Ohio?\nChoices:\n(A) Topeka\n(B) Cincinnati\n(C) Columbus\n(D) Fayetteville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.40625, "loss": 4.064574718475342 }, { "id": "identity_288960", "image": "A-OKVQA/images/459185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type of event is this rider participating?\nChoices:\n(A) polo\n(B) dance\n(C) show jumping\n(D) race" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.154814720153809 }, { "id": "identity_316585", "image": "PMC-VQA/images/PMC6375371_F1_436076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the cyst located in the MRI image?\nChoices:\n(A) D5-D8\n(B) L2-L5\n(C) Cervical spine\n(D) Lower pole" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.3125, "loss": 4.934187412261963 }, { "id": "identity_261971", "image": "VQA-AS/images/12377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the weather warm today?" }, { "from": "gpt", "value": "The answer is yes it is" } ], "clip_score": 16.09375, "loss": 4.291332721710205 }, { "id": "identity_275683", "image": "TQA/images/question_images/atomic_mass_number_9015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of neutrons present in this element?\nChoices:\n(A) 146\n(B) 238\n(C) 73\n(D) 92" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.4375, "loss": 4.417572975158691 }, { "id": "identity_328450", "image": "A-OKVQA/images/278823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the yellow hose attached to?\nChoices:\n(A) drainage\n(B) sewers\n(C) outlet\n(D) fire hydrant" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.59375, "loss": 4.503050804138184 }, { "id": "identity_264983", "image": "AI2D/images/4823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label that represents Glandosum among the following choices: (A) G, (B) D, (C) E, or (D) H?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.203125, "loss": 4.4784698486328125 }, { "id": "identity_325638", "image": "PMC-VQA/images/PMC4008388_F2_285436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the brain scan at various levels?\nChoices:\n(A) Left lateral ventricles\n(B) Right lateral ventricles\n(C) Third ventricle\n(D) Fourth ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.296875, "loss": 4.676821708679199 }, { "id": "identity_308534", "image": "DVQA/images/bar_train_00161699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have at least one bar with a value greater than 4?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 25.4375, "loss": 4.0181756019592285 }, { "id": "identity_24428", "image": "ScienceQA/images/14251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this belt made of?\nChoices:\n(A) metal\n(B) rubber" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.515625, "loss": 4.173393726348877 }, { "id": "identity_8056", "image": "Super-CLEVR/images/superCLEVR_new_000428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The matte fighter has what size?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 20.015625, "loss": 4.535238742828369 }, { "id": "identity_28624", "image": "PMC-VQA/images/PMC5002182_Fig2_67480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in Panel c of the image? \nChoices:\n(A) Normal neurons, \n(B) Dysplastic region, \n(C) Magnified neurons, \n(D) Positive neurons" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5625, "loss": 4.1380438804626465 }, { "id": "identity_27324", "image": "Super-CLEVR/images/superCLEVR_new_001319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small bus that is right of the tiny metallic double bus behind the small red matte bus on the right side of the large gray object?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 27.375, "loss": 4.056394577026367 }, { "id": "identity_308398", "image": "DocVQA/images/yncd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is mentioned in the header of this document?" }, { "from": "gpt", "value": "The answer is chcop agenda" } ], "clip_score": 21.75, "loss": 5.580977439880371 }, { "id": "identity_28145", "image": "PMC-VQA/images/PMC5020733_f2_72526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is shown in all the panels except panel h? \nChoices:\n(A) lateral view \n(B) anterior view \n(C) ventral view \n(D) dorsal view " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.7890625, "loss": 4.535111427307129 }, { "id": "identity_303297", "image": "Super-CLEVR/images/superCLEVR_new_000907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the rubber object located behind the big yellow thing and to the right of the bus?" }, { "from": "gpt", "value": "The answer is tandem bike" } ], "clip_score": 26.390625, "loss": 4.149942398071289 }, { "id": "identity_3605", "image": "IconQA/images/99941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.311800003051758 }, { "id": "identity_207839", "image": "Super-CLEVR/images/superCLEVR_new_000723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned directly behind the green car?" }, { "from": "gpt", "value": "The answer is A school bus" } ], "clip_score": 22.0, "loss": 4.188151836395264 }, { "id": "identity_325086", "image": "PMC-VQA/images/PMC5775363_Fig2_266010.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for this? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.3125, "loss": 4.83231258392334 }, { "id": "identity_296991", "image": "A-OKVQA/images/392851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of environment is this categorized as?\nChoices:\n(A) urban\n(B) unknowns\n(C) wild\n(D) rural" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.0703125, "loss": 4.379391193389893 }, { "id": "identity_215803", "image": "VQA2.0/images/274715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are on top of the vehicle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.828125, "loss": 4.215244293212891 }, { "id": "identity_287157", "image": "FigureQA/images/15237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Deep Pink or Dark Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.633284091949463 }, { "id": "identity_303515", "image": "Super-CLEVR/images/superCLEVR_new_000035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the small rubber motorbike?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 25.296875, "loss": 4.450533866882324 }, { "id": "identity_269060", "image": "AI2D/images/3979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the term for the wide part of the leaf. Is it:\n(A) petiole\n(B) blade\n(C) midrib\n(D) stipule" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.125, "loss": 4.907071590423584 }, { "id": "identity_315673", "image": "TQA/images/abc_question_images/parts_skeleton_17275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option indicates the mandible?\nChoices:\n(A) X\n(B) W\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 4.260200023651123 }, { "id": "identity_108617", "image": "TextVQA/images/5cf7e1947a240b38.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object carved into the book?" }, { "from": "gpt", "value": "The answer is A well or pit" } ], "clip_score": 27.1875, "loss": 4.115593910217285 }, { "id": "identity_275078", "image": "FigureQA/images/44146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.875, "loss": 4.164463043212891 }, { "id": "identity_132170", "image": "VizWiz/images/VizWiz_train_00019454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many diagrams depict a full moon phase?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 31.625, "loss": 4.454537391662598 }, { "id": "identity_332814", "image": "A-OKVQA/images/265243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What photographic technique was used to capture the movement of something on the road?\nChoices:\n(A) panorama\n(B) hdr\n(C) time-lapse\n(D) bokeh" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.171875, "loss": 4.037070274353027 }, { "id": "identity_10964", "image": "A-OKVQA/images/247909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the stuffed animals on display?\nChoices:\n(A) as trophies\n(B) to appreciate\n(C) to sell\n(D) as art" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.890625, "loss": 4.20957088470459 }, { "id": "identity_329731", "image": "FigureQA/images/37559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.71875, "loss": 4.202336311340332 }, { "id": "identity_303363", "image": "ScienceQA/images/5374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Saint Vincent and the Grenadines\n(B) Trinidad and Tobago\n(C) Dominica\n(D) Grenada" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.4160332679748535 }, { "id": "identity_187166", "image": "PMC-VQA/images/PMC8511819_F1_82841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the leg in the image?" }, { "from": "gpt", "value": "The answer is Lateral view" } ], "clip_score": 27.75, "loss": 4.155613422393799 }, { "id": "identity_272200", "image": "TQA/images/abc_question_images/parts_fish_12911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the pelvic fins:\n(A) W\n(B) N\n(C) P\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.767434597015381 }, { "id": "identity_217602", "image": "A-OKVQA/images/29327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many giraffes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.046875, "loss": 4.127584934234619 }, { "id": "identity_329714", "image": "PMC-VQA/images/PMC7117604_FIG1_144076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the bronchus was affected by the lesion?\nChoices:\n(A) Left\n(B) Right\n(C) Both\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.265395641326904 }, { "id": "identity_16819", "image": "TQA/images/question_images/human_system_respiratory_6198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What lobe of the lung is between the superior lobe and the inferior lobe?\nChoices:\n(A) middle lobe\n(B) carina\n(C) diaphragm\n(D) trachea" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.674872398376465 }, { "id": "identity_185384", "image": "TextVQA/images/81cb69ea15958267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many straws are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.828125, "loss": 4.149618148803711 }, { "id": "identity_68486", "image": "ScienceQA/images/2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rubber toys are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.203125, "loss": 4.372884273529053 }, { "id": "identity_327077", "image": "A-OKVQA/images/390539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the uniform has the most similar theme to the motorcycle?\nChoices:\n(A) shirt\n(B) gloves\n(C) belt\n(D) boots" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.421875, "loss": 4.565001010894775 }, { "id": "identity_292744", "image": "A-OKVQA/images/184201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of boat is this?\nChoices:\n(A) tow boat\n(B) fishing\n(C) transport\n(D) coast guard" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.296875, "loss": 4.429966449737549 }, { "id": "identity_289625", "image": "IconQA/images/70913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the tenth in the sequence if the first picture is a crab? Is it a boat, bucket, or another crab?\nChoices:\n(A) boat\n(B) bucket\n(C) crab" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.125, "loss": 4.043709754943848 }, { "id": "identity_318288", "image": "Super-CLEVR/images/superCLEVR_new_001275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the matte car that is behind the small matte car to the left of the matte minivan?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 27.796875, "loss": 5.2508039474487305 }, { "id": "identity_308717", "image": "AI2D/images/3995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of this planet called?\nChoices:\n(A) solid inner core\n(B) none of the above\n(C) crust\n(D) mantle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 4.15109395980835 }, { "id": "identity_9249", "image": "PMC-VQA/images/PMC5874954_acm20542-fig-0003_295138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is being referred to in the caption?\nChoices:\n(A) Computed tomography (CT)\n(B) Magnetic resonance imaging (MRI)\n(C) Positron emission tomography (PET)\n(D) X-ray radiography" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.328125, "loss": 4.4037604331970215 }, { "id": "identity_283913", "image": "PMC-VQA/images/PMC4312452_Fig2_354195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the right image, what is the representation of the purple color?\nChoices:\n(A) Dorsal valve \n(B) Spiculation \n(C) Endoskeleton of the plectolophous lophophore \n(D) Ventral valve" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.109375, "loss": 4.1710615158081055 }, { "id": "identity_37143", "image": "IconQA/images/67383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.28125, "loss": 5.0311174392700195 }, { "id": "identity_272265", "image": "AI2D/images/560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the correct term that refers to the stalk connecting a leaf to a stem from the given options:\n(A) Petiole\n(B) Margin\n(C) Lamina\n(D) Tip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.90625, "loss": 4.055108070373535 }, { "id": "identity_333539", "image": "A-OKVQA/images/65488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person wearing the covering around their neck?\nChoices:\n(A) keeping warm\n(B) covering scar\n(C) vanity\n(D) style" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.609375, "loss": 4.539215087890625 }, { "id": "identity_269460", "image": "TQA/images/abc_question_images/aquifers_16517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram represents the ASR borehole? \n(A) U\n(B) R\n(C) P\n(D) T" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.71875, "loss": 4.327237129211426 }, { "id": "identity_2327", "image": "TextVQA/images/440c4d4d441af2ec.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what restaurant is being advertised?" }, { "from": "gpt", "value": "The answer is pie house" } ], "clip_score": 21.84375, "loss": 4.308798313140869 }, { "id": "identity_17150", "image": "PMC-VQA/images/PMC7423083_F2_233126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the new T2 lesion located? \nChoices:\n(A) Left side of the pons \n(B) Right side of the pons \n(C) Centrum semiovale \n(D) Cerebellum " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 4.675530433654785 }, { "id": "identity_327532", "image": "A-OKVQA/images/388955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person doing to the wave?\nChoices:\n(A) splitting\n(B) carving\n(C) cutting\n(D) slicing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.46875, "loss": 4.3924994468688965 }, { "id": "identity_321505", "image": "A-OKVQA/images/314904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is credited with the creation of this sport?\nChoices:\n(A) shaq o'neal\n(B) james naismith\n(C) michael jordan\n(D) wayne gretzky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.421875, "loss": 4.492857933044434 }, { "id": "identity_252465", "image": "A-OKVQA/images/129438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely purpose of the large cone-shaped object in the context of the scene, and how does it contribute to the activities of the people around it?" }, { "from": "gpt", "value": "The answer is The gramophone provides music for social interaction" } ], "clip_score": 24.703125, "loss": 4.75130558013916 }, { "id": "identity_292242", "image": "PMC-VQA/images/PMC6891027_jgh312213-fig-0001_78336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe of the liver is the abnormality observed?\nChoices:\n(A) Right lobe\n(B) Left lobe\n(C) Caudate lobe\n(D) Quadrate lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.48742151260376 }, { "id": "identity_2901", "image": "TQA/images/question_images/cycle_water_4974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process happens after surface runoff?\nChoices:\n(A) Infiltration\n(B) Precipitation on land\n(C) Evaporation from land\n(D) Evaporation from ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.125, "loss": 4.269944667816162 }, { "id": "identity_72690", "image": "ScienceQA/images/3267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beans are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 31.125, "loss": 4.29918909072876 }, { "id": "identity_285410", "image": "TabMWP/images/34492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.421875, "loss": 4.182644367218018 }, { "id": "identity_19924", "image": "FigureQA/images/24207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.233954429626465 }, { "id": "identity_305407", "image": "VQA-AS/images/14736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is closest to the plant?" }, { "from": "gpt", "value": "The answer is window" } ], "clip_score": 22.5, "loss": 5.0639543533325195 }, { "id": "identity_108877", "image": "A-OKVQA/images/375086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the plate?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 17.125, "loss": 4.964994430541992 }, { "id": "identity_267610", "image": "FigureQA/images/82791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest, Saddle Brown? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.859375, "loss": 4.087665557861328 }, { "id": "identity_332207", "image": "A-OKVQA/images/267885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people doing on top of the elephants?\nChoices:\n(A) feeding them\n(B) riding them\n(C) selling them\n(D) cleaning them" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.625, "loss": 4.540582180023193 }, { "id": "identity_48903", "image": "DVQA/images/bar_train_00191642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'sign' algorithm?" }, { "from": "gpt", "value": "The answer is cabin" } ], "clip_score": 31.03125, "loss": 4.0494537353515625 }, { "id": "identity_203649", "image": "PMC-VQA/images/PMC3640907_F1_202343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the white arrow pointing to?" }, { "from": "gpt", "value": "The answer is Blood vessel" } ], "clip_score": 24.53125, "loss": 4.62140417098999 }, { "id": "identity_199430", "image": "VQA2.0/images/237608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine glasses are hanging from the rack?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.296875, "loss": 4.701893329620361 }, { "id": "identity_32389", "image": "IconQA/images/48932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 1/2\n(B) 1/4\n(C) 1/3\n(D) 1/5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.375, "loss": 4.221484661102295 }, { "id": "identity_290643", "image": "A-OKVQA/images/298151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which nut is placed on the plate among the following options: peanut, walnut, cashew, or pistachio?\nChoices:\n(A) peanut\n(B) walnut\n(C) cashew\n(D) pistachio" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.03125, "loss": 4.117656230926514 }, { "id": "identity_314741", "image": "A-OKVQA/images/390086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of this dish stands out?\nChoices:\n(A) parsley\n(B) olives\n(C) vegetables\n(D) meat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.265625, "loss": 4.1202287673950195 }, { "id": "identity_307062", "image": "FigureQA/images/84483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.96875, "loss": 4.003022193908691 }, { "id": "identity_200036", "image": "TextVQA/images/cd83b6c8254e0f7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the region code for this DVD?" }, { "from": "gpt", "value": "The answer is ALL" } ], "clip_score": 19.046875, "loss": 4.522524833679199 }, { "id": "identity_270611", "image": "PMC-VQA/images/PMC7961081_f2_392000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality is utilized in panel (E) among the following options: MRI, Ultrasound, X-ray, or CT scan?\nChoices:\n(A) MRI \n(B) Ultrasound \n(C) X-ray \n(D) CT scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.609375, "loss": 4.151579856872559 }, { "id": "identity_329996", "image": "PMC-VQA/images/PMC9153154_Fig5_298031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black circle indicate in this image?\nChoices:\n(A) Exos injection site\n(B) iRGD injection site\n(C) Tumor site\n(D) SPECT/CT machine location" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.613099098205566 }, { "id": "identity_262783", "image": "VQA2.0/images/13938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sky?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.3125, "loss": 4.567049503326416 }, { "id": "identity_12536", "image": "AI2D/images/4651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What represents the hawk in the illustration?\nChoices:\n(A) none of the above\n(B) a\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.9375, "loss": 4.305511951446533 }, { "id": "identity_337322", "image": "A-OKVQA/images/448046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the two people on the tandem bike just do?\nChoices:\n(A) lost bet\n(B) met santa\n(C) got married\n(D) won game" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.90625, "loss": 4.331060409545898 }, { "id": "identity_24868", "image": "IconQA/images/6159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sub. Which picture is third?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.296875, "loss": 4.419126033782959 }, { "id": "identity_187710", "image": "VQA2.0/images/8290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are present in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 29.0, "loss": 4.28491735458374 }, { "id": "identity_214329", "image": "A-OKVQA/images/365271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the scoreboard to the left of the American flag?" }, { "from": "gpt", "value": "The answer is Amstron" } ], "clip_score": 24.578125, "loss": 5.736881256103516 }, { "id": "identity_8249", "image": "TQA/images/question_images/human_system_nervous_6172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is the cerebrum connected to?\nChoices:\n(A) Hypothalamus\n(B) Brain stem\n(C) Corpus callosum\n(D) Ventricles" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.265625, "loss": 4.081334114074707 }, { "id": "identity_148511", "image": "DocVQA/images/zhhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many reviews were received according to the records?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.421875, "loss": 4.594903945922852 }, { "id": "identity_311311", "image": "PMC-VQA/images/PMC5004802_pone.0162271.g001_67944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the middle and bottom rows depict?\nChoices:\n(A) Visual acuity tests\n(B) The participant's brain activity\n(C) The participant's affected areas\n(D) Different sections of the brain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.59375, "loss": 4.193526268005371 }, { "id": "identity_267672", "image": "IconQA/images/23748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a circle using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.015625, "loss": 4.218047142028809 }, { "id": "identity_272908", "image": "IconQA/images/90610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture follows the first image of a train in the sequence provided?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.6129913330078125 }, { "id": "identity_1922", "image": "FigureQA/images/94176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam less than Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.828125, "loss": 4.010100841522217 }, { "id": "identity_19201", "image": "IconQA/images/40350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5, "loss": 4.336421012878418 }, { "id": "identity_37472", "image": "PMC-VQA/images/PMC4730158_f4_466715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used to capture this image? \nChoices:\n(A) SEM \n(B) TEM \n(C) AFM \n(D) XRD " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 4.355101108551025 }, { "id": "identity_307946", "image": "VizWiz/images/VizWiz_train_00006737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this thing?" }, { "from": "gpt", "value": "The answer is can" } ], "clip_score": 18.3125, "loss": 4.159749507904053 }, { "id": "identity_26015", "image": "A-OKVQA/images/18370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the cars parked on the grass?\nChoices:\n(A) to show\n(B) to fix\n(C) to clean\n(D) to paint" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.7421875, "loss": 4.057711124420166 }, { "id": "identity_149682", "image": "DocVQA/images/mmlk0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the nationality of Mr. S. Stachenko?" }, { "from": "gpt", "value": "The answer is Canadian" } ], "clip_score": 21.1875, "loss": 4.040598392486572 }, { "id": "identity_267750", "image": "TQA/images/abc_question_images/human_system_reproductory_17042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the Scrotum:\n(A) C\n(B) M\n(C) V\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5625, "loss": 4.103337287902832 }, { "id": "identity_42280", "image": "DVQA/images/bar_train_00158190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the lowest accuracy on the 'grip' category?" }, { "from": "gpt", "value": "The answer is axis" } ], "clip_score": 32.4375, "loss": 4.184866428375244 }, { "id": "identity_292731", "image": "PMC-VQA/images/PMC6843538_microorganisms-07-00397-f005_65528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the activity taking place in the image shown?\nChoices:\n(A) Photosynthesis\n(B) Mitosis\n(C) Cellular respiration\n(D) None of the above" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.53125, "loss": 4.647332191467285 }, { "id": "identity_40186", "image": "FigureQA/images/9786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the Dark Gold section to the Gold section?" }, { "from": "gpt", "value": "The answer is A ratio based on estimated angles" } ], "clip_score": 30.046875, "loss": 5.6478705406188965 }, { "id": "identity_185530", "image": "A-OKVQA/images/37514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geometric shape is the kite closest to the ground?" }, { "from": "gpt", "value": "The answer is A cube" } ], "clip_score": 24.9375, "loss": 4.683934688568115 }, { "id": "identity_187435", "image": "A-OKVQA/images/111406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.921875, "loss": 4.549072265625 }, { "id": "identity_24817", "image": "TextVQA/images/3de5c3861d55a253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the first 2 letters of the license plate?" }, { "from": "gpt", "value": "The answer is gmail" } ], "clip_score": 16.4375, "loss": 6.006765842437744 }, { "id": "identity_276674", "image": "VQA2.0/images/370250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What makes the man's clothing unsafe?" }, { "from": "gpt", "value": "The answer is nothing" } ], "clip_score": 17.4375, "loss": 4.063441276550293 }, { "id": "identity_124117", "image": "VizWiz/images/VizWiz_train_00012122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the inhaler's body?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 33.4375, "loss": 4.015085697174072 }, { "id": "identity_25688", "image": "A-OKVQA/images/427091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the child crossing the street seating making progress?\nChoices:\n(A) being pulled\n(B) mechanized wheelchair\n(C) being pushed\n(D) sheer will" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.453125, "loss": 4.169147491455078 }, { "id": "identity_115562", "image": "A-OKVQA/images/334868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baggage carts can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.296875, "loss": 4.3156819343566895 }, { "id": "identity_204483", "image": "PMC-VQA/images/PMC7180539_ytaa024-F3_158057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What measurement scale is used on the left side of the ultrasound image?" }, { "from": "gpt", "value": "The answer is Centimeter depth scale" } ], "clip_score": 35.1875, "loss": 4.267697334289551 }, { "id": "identity_331092", "image": "IconQA/images/31333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is less likely to be landed on by the spinner?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.453636646270752 }, { "id": "identity_30285", "image": "A-OKVQA/images/271417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is this boat powered?\nChoices:\n(A) gas\n(B) coal\n(C) wind\n(D) battery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.875, "loss": 4.334157466888428 }, { "id": "identity_218314", "image": "TextVQA/images/cb37c61c0742a33e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow taxis can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.59375, "loss": 4.2221574783325195 }, { "id": "identity_311976", "image": "FigureQA/images/51366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8515625, "loss": 4.241912841796875 }, { "id": "identity_326138", "image": "A-OKVQA/images/229149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the person in the blue sweatshirt do next?\nChoices:\n(A) take photograph\n(B) give object\n(C) throw object\n(D) eat object" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.625, "loss": 4.30729866027832 }, { "id": "identity_313225", "image": "AI2D/images/2475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage occurs during the winter?\nChoices:\n(A) Queen Foraging\n(B) Nest Making\n(C) Queen Hibernates\n(D) Queen and Males" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.390625, "loss": 4.401403903961182 }, { "id": "identity_124248", "image": "A-OKVQA/images/48066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the stage backdrop?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.6875, "loss": 4.261905193328857 }, { "id": "identity_34567", "image": "ScienceQA/images/9027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Tallahassee\n(B) Raleigh\n(C) Oklahoma City\n(D) Charlotte" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.625, "loss": 4.208601474761963 }, { "id": "identity_15270", "image": "ScienceQA/images/806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) St. Louis\n(B) New Orleans\n(C) Houston\n(D) New York City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.390625, "loss": 4.053309917449951 }, { "id": "identity_142160", "image": "DocVQA/images/hggk0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last event listed on the calendar?" }, { "from": "gpt", "value": "The answer is Read Caravan" } ], "clip_score": 22.421875, "loss": 6.541103839874268 }, { "id": "identity_282585", "image": "PMC-VQA/images/PMC6822922_FIG1_59149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the osteoid osteoma in the patient's body? \nOptions:\n(A) Right tibia\n(B) Right femur\n(C) Left tibia\n(D) Left femur" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 4.670884609222412 }, { "id": "identity_132107", "image": "Super-CLEVR/images/superCLEVR_new_001515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is furthest from the camera?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 19.703125, "loss": 4.663485527038574 }, { "id": "identity_203026", "image": "VQA2.0/images/399232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.3125, "loss": 4.682311534881592 }, { "id": "identity_276878", "image": "A-OKVQA/images/490338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of product is sold by the store with a sign featuring a blue arrow?\nChoices:\n(A) dvd\n(B) udon\n(C) sushi\n(D) vhs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3359375, "loss": 4.433838844299316 }, { "id": "identity_30183", "image": "PMC-VQA/images/PMC5121968_Fig1_98781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anatomical region is being imaged in the midline sagittal image?\nChoices:\n(A) Shoulder joint \n(B) Knee joint \n(C) Hip joint \n(D) Elbow joint " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0625, "loss": 4.6713151931762695 }, { "id": "identity_31176", "image": "ScienceQA/images/4765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maine?\nChoices:\n(A) Charleston\n(B) Portland\n(C) Hartford\n(D) Augusta" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.71875, "loss": 4.424059867858887 }, { "id": "identity_318686", "image": "AI2D/images/2275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the above diagram, which part indicates an adult turtle?\nChoices:\n(A) C\n(B) E\n(C) F\n(D) B" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.015625, "loss": 4.098695278167725 }, { "id": "identity_288097", "image": "A-OKVQA/images/177319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which location are the bike riders currently riding? \n(A) school\n(B) park\n(C) bike path\n(D) trail" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.28125, "loss": 4.287173271179199 }, { "id": "identity_878", "image": "TextVQA/images/cace6f17ad0f4c31.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what organization is sponsoring this event that is on the women's dresses?" }, { "from": "gpt", "value": "The answer is change" } ], "clip_score": 21.953125, "loss": 4.510622978210449 }, { "id": "identity_323353", "image": "Super-CLEVR/images/superCLEVR_new_001411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the red rubber jet the same size as the object behind the big blue matte aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.875, "loss": 4.739448070526123 }, { "id": "identity_17874", "image": "PMC-VQA/images/PMC5075973_F2_87014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain structure is affected by the lesion? \nChoices:\n(A) Cerebellum \n(B) Hippocampus \n(C) Globus Pallidus \n(D) Thalamus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.984375, "loss": 4.106871604919434 }, { "id": "identity_9295", "image": "ScienceQA/images/2254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Cuba\n(B) Saint Lucia\n(C) Saint Kitts and Nevis\n(D) Antigua and Barbuda" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.3125, "loss": 4.159333229064941 }, { "id": "identity_303755", "image": "TQA/images/abc_question_images/seasons_10084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one indicates autumn?\nChoices:\n(A) X\n(B) P\n(C) Y\n(D) J" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.953125, "loss": 4.374129772186279 }, { "id": "identity_310931", "image": "PMC-VQA/images/PMC6545224_Fig2_488282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the imaging technique used to detect this abnormality?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.1875, "loss": 4.273955345153809 }, { "id": "identity_154696", "image": "AI2D/images/2142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct stages are depicted in the jellyfish life cycle diagram?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 35.34375, "loss": 4.1179680824279785 }, { "id": "identity_9680", "image": "PMC-VQA/images/PMC6065269_F1_347427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is shown in the second image? \nChoices:\n(A) Anteroposterior radiograph \n(B) Lateral radiograph \n(C) Oblique radiograph \n(D) CT scan " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.671875, "loss": 4.439369201660156 }, { "id": "identity_147572", "image": "DocVQA/images/pfjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the stock type for Envelopes?" }, { "from": "gpt", "value": "The answer is to match Cranes W-34" } ], "clip_score": 26.71875, "loss": 4.832379341125488 }, { "id": "identity_266075", "image": "FigureQA/images/93375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.282248497009277 }, { "id": "identity_48472", "image": "MapQA/images/map_955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state appears to have the lowest number of hearings?" }, { "from": "gpt", "value": "The answer is a state name within ten words" } ], "clip_score": 36.9375, "loss": 4.465823650360107 }, { "id": "identity_138112", "image": "DVQA/images/bar_train_00101044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least preference for 'boss'?" }, { "from": "gpt", "value": "The answer is brain" } ], "clip_score": 25.015625, "loss": 4.247436046600342 }, { "id": "identity_329899", "image": "AI2D/images/1789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram of a food chain and your knowledge of science, if all the zooplankton were to die, the brittle star population would most likely\nChoices:\n(A) decrease\n(B) increase\n(C) remain the same\n(D) expire" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.28125, "loss": 4.365405082702637 }, { "id": "identity_109329", "image": "VizWiz/images/VizWiz_train_00012404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object to the right of the circular item?" }, { "from": "gpt", "value": "The answer is heart" } ], "clip_score": 14.9140625, "loss": 4.900259494781494 }, { "id": "identity_755", "image": "A-OKVQA/images/57617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will happen to the car on the back of the tow truck?\nChoices:\n(A) trashed\n(B) driven away\n(C) buffed out\n(D) repaired" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.015625, "loss": 4.362066745758057 }, { "id": "identity_281909", "image": "A-OKVQA/images/335824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of trees can be seen in this location?\nChoices:\n(A) fir\n(B) palm\n(C) evergreen\n(D) deciduous" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.79296875, "loss": 4.152687072753906 }, { "id": "identity_34441", "image": "Super-CLEVR/images/superCLEVR_new_000042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a yellow motorbike on the left side of the matte thing that is in front of the yellow thing; is there a tiny metallic dirtbike that is on the left side of it?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.265625, "loss": 5.3242268562316895 }, { "id": "identity_305528", "image": "DVQA/images/bar_train_00025232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category does the mediumpurple color indicate?" }, { "from": "gpt", "value": "The answer is date" } ], "clip_score": 19.8125, "loss": 4.060364246368408 }, { "id": "identity_29863", "image": "PMC-VQA/images/PMC4631848_fig2_440387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the pain located in the patient described in the Scout film? \nChoices:\n(A) Left flank \n(B) Right flank \n(C) Both flanks \n(D) Abdomen. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.203125, "loss": 4.1841888427734375 }, { "id": "identity_293405", "image": "PMC-VQA/images/PMC4877350_Fig14_33157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of pregnancy is being depicted in the caption? Is it:\n(A) Ovarian pregnancy\n(B) Cervical pregnancy\n(C) Tubal pregnancy\n(D) Uterine pregnancy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.484375, "loss": 5.182906150817871 }, { "id": "identity_14502", "image": "TextVQA/images/1ea65daf0d469bfa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who are the people that get it?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 14.6875, "loss": 4.975092887878418 }, { "id": "identity_265106", "image": "ScienceQA/images/8207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Mississippi\n(B) Pennsylvania\n(C) New Jersey\n(D) Delaware" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.171875, "loss": 4.602044105529785 }, { "id": "identity_274466", "image": "A-OKVQA/images/311715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of vehicle is the man in the gray hat riding, with the options being a wagon, four-wheeler, trike, or sports bike?\nChoices:\n(A) wagon\n(B) four wheeler\n(C) trike\n(D) sports bike" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.7890625, "loss": 4.298900127410889 }, { "id": "identity_23326", "image": "DVQA/images/bar_train_00058825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which stack of bars contains the smallest valued individual element in the whole chart?" }, { "from": "gpt", "value": "The answer is output" } ], "clip_score": 29.203125, "loss": 4.249476432800293 }, { "id": "identity_280232", "image": "PMC-VQA/images/PMC8007494_Fig11_405725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to identify the calcification: X-ray, CT scan, MRI, or Ultrasound?\nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.90625, "loss": 4.309396266937256 }, { "id": "identity_223019", "image": "PMC-VQA/images/PMC6071436_rcr2357-fig-0001_349117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign visible in the right upper lobe of the lung?" }, { "from": "gpt", "value": "The answer is Halo sign" } ], "clip_score": 25.5, "loss": 4.0446953773498535 }, { "id": "identity_132388", "image": "A-OKVQA/images/48759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the left side of the street?" }, { "from": "gpt", "value": "The answer is A pub" } ], "clip_score": 19.59375, "loss": 4.12494421005249 }, { "id": "identity_138893", "image": "DocVQA/images/jzbn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Sarah Brozena" } ], "clip_score": 16.109375, "loss": 4.100884437561035 }, { "id": "identity_273200", "image": "Super-CLEVR/images/superCLEVR_new_000957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shapes of the small purple object and the large rubber object to the left of the big yellow motorbike the same?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.359375, "loss": 5.361522197723389 }, { "id": "identity_271704", "image": "PMC-VQA/images/PMC6325806_Fig11_420541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you describe the outer margins of the postabdominal segments in terms of appearance?\nChoices:\n(A) Overlap \n(B) Disarticulation \n(C) Telescoping \n(D) Constriction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.421875, "loss": 4.607468605041504 }, { "id": "identity_307417", "image": "PMC-VQA/images/PMC8861992_Fig1_199327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What observations were made on the patient's CT chest scan?\nChoices:\n(A) Bilateral anterior upper lobe consolidations\n(B) Bilateral posterior lower lobe consolidations\n(C) Unilateral middle lobe consolidation\n(D) No consolidations seen." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.09375, "loss": 4.265902042388916 }, { "id": "identity_274635", "image": "ScienceQA/images/4295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: Kiribati, Nauru, the Marshall Islands, or Vanuatu?\nChoices:\n(A) Kiribati\n(B) Nauru\n(C) the Marshall Islands\n(D) Vanuatu" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.5625, "loss": 4.230687141418457 }, { "id": "identity_322616", "image": "DocVQA/images/lpgn0226_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which state is this company located?" }, { "from": "gpt", "value": "The answer is IN" } ], "clip_score": 18.46875, "loss": 4.384293556213379 }, { "id": "identity_284372", "image": "IconQA/images/76995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.984375, "loss": 4.020273208618164 }, { "id": "identity_330376", "image": "A-OKVQA/images/3602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who should an incident be reported to?\nChoices:\n(A) pedestrian\n(B) police division\n(C) motorcyclist\n(D) car driver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.484375, "loss": 4.421875953674316 }, { "id": "identity_313916", "image": "ScienceQA/images/10105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Mature fern plants have flat leaves. What are these leaves called?\nChoices:\n(A) fronds\n(B) spore cases" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 4.0565290451049805 }, { "id": "identity_288630", "image": "PMC-VQA/images/PMC4902502_F3_40168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the DWI sequences in the right hippocampus?\nChoices:\n(A) Fluid accumulation \n(B) Decreased signal \n(C) Increased signal \n(D) No signal change" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.890625, "loss": 4.818441390991211 }, { "id": "identity_322559", "image": "Super-CLEVR/images/superCLEVR_new_000634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of tiny yellow metal tandem bikes that are to the left of the gray bicycle and vehicles behind the gray rubber bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.234375, "loss": 4.9103899002075195 }, { "id": "identity_32808", "image": "AI2D/images/4110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there are no more trees what will happen to air?\nChoices:\n(A) Nothing will happen\n(B) No oxygen for air\n(C) Not sure\n(D) Air will be good" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.796875, "loss": 4.451494216918945 }, { "id": "identity_133150", "image": "Super-CLEVR/images/superCLEVR_new_001586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that has two wheels and is located to the left of the yellow sports car?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 22.328125, "loss": 4.525141716003418 }, { "id": "identity_264008", "image": "ScienceQA/images/19125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.875, "loss": 4.0469207763671875 }, { "id": "identity_274428", "image": "AI2D/images/4449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which label represents flat foot?\nChoices:\n(A) A\n(B) C\n(C) B\n(D) None" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.78125, "loss": 4.376893043518066 }, { "id": "identity_271641", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g002_437031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the white arrow shown in the figure?\nChoices:\n(A) The distribution of IED from frontal to parietal \n(B) The distribution of IED from frontotemporal to temporoparietal \n(C) The distribution of IED from occipital to temporal \n(D) The distribution of IED from parietal to temporal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.59375, "loss": 4.354930877685547 }, { "id": "identity_315541", "image": "ScienceQA/images/18626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this scarf made of?\nChoices:\n(A) wool\n(B) asphalt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.15625, "loss": 4.341732501983643 }, { "id": "identity_219118", "image": "A-OKVQA/images/345702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible behind the barrier on the right side of the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.328125, "loss": 4.540574073791504 }, { "id": "identity_232612", "image": "DVQA/images/bar_train_00137903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total number of people across all objects?" }, { "from": "gpt", "value": "The answer is return" } ], "clip_score": 27.890625, "loss": 4.20567512512207 }, { "id": "identity_14319", "image": "A-OKVQA/images/264828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The largest item here is usually found where?\nChoices:\n(A) ocean\n(B) hangar\n(C) office building\n(D) cave" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.3125, "loss": 4.532108306884766 }, { "id": "identity_25915", "image": "TQA/images/question_images/human_system_reproductory_7015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What cannot occur when the vas deferens is cut?\nChoices:\n(A) sperm do not mature\n(B) erection can not occur\n(C) sperm can not leave the epididymis or be ejaculated\n(D) erection can not occur" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.235167026519775 }, { "id": "identity_315072", "image": "A-OKVQA/images/575222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the front of this?\nChoices:\n(A) bear\n(B) clown\n(C) elephant\n(D) zebra" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1796875, "loss": 4.020806789398193 }, { "id": "identity_330113", "image": "TextVQA/images/0173c483a5f56dc9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this red sign say?" }, { "from": "gpt", "value": "The answer is b + plus" } ], "clip_score": 16.25, "loss": 5.3853840827941895 }, { "id": "identity_304719", "image": "AI2D/images/2613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letters in the provided diagram indicate the Sun?\nChoices:\n(A) H & C\n(B) E & F\n(C) D & I\n(D) J & G" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.28125, "loss": 4.128911972045898 }, { "id": "identity_295278", "image": "IconQA/images/10968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.171875, "loss": 4.238228797912598 }, { "id": "identity_70728", "image": "AI2D/images/860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Four levels" } ], "clip_score": 30.046875, "loss": 4.5162353515625 }, { "id": "identity_21777", "image": "PMC-VQA/images/PMC7162474_pone.0231523.g002_152962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the approach shown in the second image for Color Doppler US imaging? \nChoices:\n(A) Transabdominal approach \n(B) Transvaginal approach \n(C) Transrectal approach \n(D) Supraclavicular approach " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.046875, "loss": 4.60312032699585 }, { "id": "identity_12983", "image": "TextVQA/images/bdbf26933c859932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the box to the left of the shirt?" }, { "from": "gpt", "value": "The answer is nowc-zipper-blck 25.5" } ], "clip_score": 19.03125, "loss": 4.720195770263672 }, { "id": "identity_197218", "image": "VQA-AS/images/14914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are visible in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.875, "loss": 4.138339996337891 }, { "id": "identity_283636", "image": "PMC-VQA/images/PMC5023728_Fig1_73075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the second image of case 3, which brain region is depicted in greater detail?\nChoices:\n(A) cerebellum\n(B) thalamus\n(C) hippocampus\n(D) substantia nigra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.421875, "loss": 4.162498474121094 }, { "id": "identity_31752", "image": "PMC-VQA/images/PMC3128610_pone-0021749-g004_101311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrowheads indicate in panel B and C?\nChoices:\n(A) thick bundled pilus structures \n(B) membrane blebs \n(C) higher magnification \n(D) negative staining " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.15625, "loss": 4.753817081451416 }, { "id": "identity_247969", "image": "PMC-VQA/images/PMC5455666_materials-08-04287-f005_173123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the SEM image, determine the material of the fractured surface and explain the key microstructural features that led to this identification" }, { "from": "gpt", "value": "The answer is Titanium" } ], "clip_score": 32.84375, "loss": 4.254574775695801 }, { "id": "identity_287676", "image": "TextVQA/images/8994f0a839f29131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is displayed on the purple banner located on the yellow cans?" }, { "from": "gpt", "value": "The answer is zanzibar" } ], "clip_score": 20.125, "loss": 4.075446128845215 }, { "id": "identity_251308", "image": "ScienceQA/images/13823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital" }, { "from": "gpt", "value": "The answer is Columbia" } ], "clip_score": 28.875, "loss": 4.695821285247803 }, { "id": "identity_302137", "image": "Super-CLEVR/images/superCLEVR_new_000953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the big chopper left of the small thing that is behind the matte bicycle made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 29.34375, "loss": 5.274658203125 }, { "id": "identity_3584", "image": "FigureQA/images/3824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.875, "loss": 4.029407501220703 }, { "id": "identity_278900", "image": "TextVQA/images/22c32952b17e0cbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand of the wines that are being showcased?" }, { "from": "gpt", "value": "The answer is cadus" } ], "clip_score": 28.375, "loss": 4.8097944259643555 }, { "id": "identity_194923", "image": "PMC-VQA/images/PMC6712706_Fig2_29631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might the red lines suggest about the condition of the foot?" }, { "from": "gpt", "value": "The answer is Possible deviation or abnormality in foot structure" } ], "clip_score": 30.890625, "loss": 4.036176681518555 }, { "id": "identity_204104", "image": "PMC-VQA/images/PMC6327415_Fig13_420938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant tone of the image?" }, { "from": "gpt", "value": "The answer is Mid to dark gray tones" } ], "clip_score": 20.421875, "loss": 4.513768196105957 }, { "id": "identity_301801", "image": "DVQA/images/bar_train_00046694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bottom element in each stack of bars?" }, { "from": "gpt", "value": "The answer is degree" } ], "clip_score": 24.203125, "loss": 4.0843024253845215 }, { "id": "identity_57631", "image": "DocVQA/images/xggn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the patient's last name as per the form?" }, { "from": "gpt", "value": "The answer is Not legible" } ], "clip_score": 23.65625, "loss": 4.249909400939941 }, { "id": "identity_298238", "image": "AI2D/images/4016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the layer located between the inner core and mantle.\nChoices:\n(A) Continental crust\n(B) Lithosphere\n(C) Outer core\n(D) Asthenosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.171875, "loss": 4.5541510581970215 }, { "id": "identity_262668", "image": "TextVQA/images/bb62f2d28c307de2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which language is used in the advertisement?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.828125, "loss": 4.496147155761719 }, { "id": "identity_312933", "image": "PMC-VQA/images/PMC2361926_fig1_21606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is showing abnormal uptake in this image?\nChoices:\n(A) Left cervical region and left premolar area.\n(B) Right cervical region and right premolar area.\n(C) Abdomen.\n(D) Brain." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.209264278411865 }, { "id": "identity_282810", "image": "A-OKVQA/images/497261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does Bank of America serve in relation to this game?\nChoices:\n(A) site provider\n(B) loan provider\n(C) sponsor\n(D) fund provider" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.09375, "loss": 4.378232002258301 }, { "id": "identity_44561", "image": "DocVQA/images/zxbw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the classification of the project 'Paper PC(1)'?" }, { "from": "gpt", "value": "The answer is Efficacy" } ], "clip_score": 23.6875, "loss": 4.269011497497559 }, { "id": "identity_206164", "image": "VQA2.0/images/58343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the soccer ball?" }, { "from": "gpt", "value": "The answer is Select" } ], "clip_score": 17.875, "loss": 4.6723504066467285 }, { "id": "identity_215177", "image": "A-OKVQA/images/315037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man's backpack placed next to?" }, { "from": "gpt", "value": "The answer is Laptop" } ], "clip_score": 25.640625, "loss": 4.023682117462158 }, { "id": "identity_335210", "image": "DocVQA/images/llcv0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the schedule at this time as mentioned in the given agenda?" }, { "from": "gpt", "value": "The answer is Introduction" } ], "clip_score": 23.984375, "loss": 5.623018741607666 }, { "id": "identity_219706", "image": "A-OKVQA/images/234938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many picnic tables are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.109375, "loss": 4.681419372558594 }, { "id": "identity_209115", "image": "PMC-VQA/images/PMC6334890_FIG1_423706.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highlighted area in the images indicating?" }, { "from": "gpt", "value": "The answer is Cervical calcinosis" } ], "clip_score": 26.953125, "loss": 4.373032569885254 }, { "id": "identity_106674", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red stools are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.234375, "loss": 4.443757057189941 }, { "id": "identity_26996", "image": "PMC-VQA/images/PMC7939179_F4_385961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image indicate? \nChoices:\n(A) A fracture in the skull \n(B) A low-density shadow at frontal lobe \n(C) A high-density shadow at basal ganglia \n(D) A high-density shadow at cerebral cortex " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.34375, "loss": 4.6094889640808105 }, { "id": "identity_280053", "image": "ScienceQA/images/12736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three objects? \nChoices:\n(A) fragile\n(B) blue\n(C) bouncy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.6015625, "loss": 4.184008598327637 }, { "id": "identity_296846", "image": "TextVQA/images/6bc176acb320908a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the image?" }, { "from": "gpt", "value": "The answer is color coded" } ], "clip_score": 18.125, "loss": 4.025757312774658 }, { "id": "identity_302259", "image": "TextVQA/images/a1058ccb3c5ae651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this beer from?" }, { "from": "gpt", "value": "The answer is usquena" } ], "clip_score": 24.53125, "loss": 5.475425720214844 }, { "id": "identity_285044", "image": "IconQA/images/40221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5625, "loss": 4.775874137878418 }, { "id": "identity_226489", "image": "VQA2.0/images/475019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of structures can you identify in the ski resort, and what are their purposes?" }, { "from": "gpt", "value": "The answer is Five different types of structures" } ], "clip_score": 29.515625, "loss": 4.5967230796813965 }, { "id": "identity_287277", "image": "A-OKVQA/images/149467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which team does the catcher play? \n(A) cubs\n(B) yankees\n(C) astros\n(D) mets" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 4.2951250076293945 }, { "id": "identity_261204", "image": "A-OKVQA/images/406949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the traffic light is green, what should the cars do if a pedestrian is about to cross the street from the left side of the image?" }, { "from": "gpt", "value": "The answer is wait" } ], "clip_score": 27.953125, "loss": 5.109419822692871 }, { "id": "identity_263126", "image": "A-OKVQA/images/132773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number is adjacent to Q on the sign? \n(A) three \n(B) eight \n(C) five \n(D) ten" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 5.25390625, "loss": 4.089169502258301 }, { "id": "identity_325631", "image": "PMC-VQA/images/PMC8148521_biology-10-00412-f005_450369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the blue arrow indicate in this image?\nChoices:\n(A) Liver Parenchyma\n(B) Nodule\n(C) Artery\n(D) Vein" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.34375, "loss": 5.113005638122559 }, { "id": "identity_336278", "image": "FigureQA/images/78655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2890625, "loss": 4.001747131347656 }, { "id": "identity_316649", "image": "TQA/images/question_images/human_system_muscular_6158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body does the brachioradialis belong to?\nChoices:\n(A) leg\n(B) arm\n(C) shoulder\n(D) foot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.078125, "loss": 4.317478179931641 }, { "id": "identity_29362", "image": "A-OKVQA/images/382715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lane does the skateboarder travel here?\nChoices:\n(A) bike\n(B) bus\n(C) passing\n(D) sidewalk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.140625, "loss": 4.511507987976074 }, { "id": "identity_3173", "image": "FigureQA/images/11755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.328125, "loss": 4.228487968444824 }, { "id": "identity_269874", "image": "IconQA/images/76847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) unlikely\n(B) certain\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.296875, "loss": 4.22531270980835 }, { "id": "identity_322589", "image": "PMC-VQA/images/PMC6647078_Fig2_12408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is used to visualize this mapping?\nChoices:\n(A) DCE-MRI \n(B) DWI \n(C) Mammography \n(D) PET-CT" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.640625, "loss": 4.783661842346191 }, { "id": "identity_203892", "image": "VQA2.0/images/98878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth on the table?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.234375, "loss": 4.34233283996582 }, { "id": "identity_316319", "image": "A-OKVQA/images/298726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they have to hit something over?\nChoices:\n(A) water\n(B) plate\n(C) net\n(D) sand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.84375, "loss": 4.190852165222168 }, { "id": "identity_251329", "image": "A-OKVQA/images/473773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the poles dragged around by the young child with his skis, and how many people are sitting in the background?" }, { "from": "gpt", "value": "The answer is Red, three" } ], "clip_score": 27.828125, "loss": 4.227175712585449 }, { "id": "identity_25745", "image": "DocVQA/images/yhfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heading of the document?" }, { "from": "gpt", "value": "The answer is Cleveland Health Goals Project" } ], "clip_score": 21.71875, "loss": 4.854694843292236 }, { "id": "identity_18302", "image": "TQA/images/abc_question_images/seasons_12629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents Summer in the Northern Hemisphere?\nChoices:\n(A) K\n(B) S\n(C) N\n(D) C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.367921352386475 }, { "id": "identity_280242", "image": "PMC-VQA/images/PMC5920263_fig01_306710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings on the lateral ankle radiographs? \nChoices:\n(A) Only a fracture of the medial malleolus.\n(B) A fracture of the lateral malleolus.\n(C) A suspicious avulsion fracture on the anterior part of the lateral malleolus.\n(D) A suspicious avulsion fracture on the posterior part of the lateral malleolus." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.03125, "loss": 4.086928367614746 }, { "id": "identity_282674", "image": "VQA2.0/images/212901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance of the clock from the ground?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.953125, "loss": 4.192740440368652 }, { "id": "identity_22292", "image": "PMC-VQA/images/PMC3552813_f4_180745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in the image indicate? \nChoices:\n(A) Areas of unfilled tunneling and resorption space \n(B) Fatty bone marrow changes \n(C) Cartilage bars in cortical bone \n(D) Areas of cortical bone loss " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.96875, "loss": 5.164916515350342 }, { "id": "identity_143314", "image": "DocVQA/images/qhjl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of agenda number 5?" }, { "from": "gpt", "value": "The answer is Switching charges" } ], "clip_score": 20.265625, "loss": 5.089209079742432 }, { "id": "identity_103542", "image": "VQA-AS/images/18367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is placed on the left wall?" }, { "from": "gpt", "value": "The answer is Coat stand" } ], "clip_score": 28.0, "loss": 4.089440822601318 }, { "id": "identity_303879", "image": "FigureQA/images/67411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.640625, "loss": 4.625784873962402 }, { "id": "identity_198343", "image": "VQA2.0/images/61488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct types of food items are visible on the table covered with a white cloth?" }, { "from": "gpt", "value": "The answer is Three types" } ], "clip_score": 23.53125, "loss": 4.396586894989014 }, { "id": "identity_266883", "image": "VQA2.0/images/246099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is located on the desk behind the currency?" }, { "from": "gpt", "value": "The answer is no money" } ], "clip_score": 12.9609375, "loss": 4.469302654266357 }, { "id": "identity_210515", "image": "PMC-VQA/images/PMC4330230_Fig8_358741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.171875, "loss": 4.462305545806885 }, { "id": "identity_292748", "image": "PMC-VQA/images/PMC5946363_F1_313143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the CT scan results among the following options: (A) Decreased swelling, (B) Normal tissue, (C) Marked swelling, or (D) Calcification?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.8125, "loss": 4.021430969238281 }, { "id": "identity_283482", "image": "PMC-VQA/images/PMC6450963_Fig3_461892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images were taken of the proband?\nChoices:\n(A) XR images\n(B) ECG images\n(C) CT images\n(D) ER images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.125, "loss": 4.028883457183838 }, { "id": "identity_263321", "image": "Super-CLEVR/images/superCLEVR_new_000389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the object located to the left of the small car and in front of the big green shiny motorbike made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 33.125, "loss": 4.523294448852539 }, { "id": "identity_134786", "image": "DVQA/images/bar_train_00149717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest \"luck\" value?" }, { "from": "gpt", "value": "The answer is wealth" } ], "clip_score": 22.046875, "loss": 4.267184257507324 }, { "id": "identity_283302", "image": "ScienceQA/images/17828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the following choices: (A) sour, (B) blue, (C) salty?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0, "loss": 4.2513556480407715 }, { "id": "identity_17263", "image": "DVQA/images/bar_train_00016058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is spot" } ], "clip_score": 21.71875, "loss": 4.382988929748535 }, { "id": "identity_212064", "image": "A-OKVQA/images/138956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the buildings' facades in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 19.5, "loss": 4.45200252532959 }, { "id": "identity_279424", "image": "DVQA/images/bar_train_00066326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in the chart?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.578125, "loss": 4.091668128967285 }, { "id": "identity_291381", "image": "PMC-VQA/images/PMC3681594_F2_211112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does the tumor appear in the portal phase? \nOptions:\n(A) Solid \n(B) Cystic \n(C) Necrotic \n(D) Enhanced" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.046875, "loss": 4.691885948181152 }, { "id": "identity_191304", "image": "PMC-VQA/images/PMC7035375_Fig1_122618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain images are displayed in the series?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.796875, "loss": 4.184516429901123 }, { "id": "identity_39038", "image": "MapQA/images/map_2264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value range between 46.2%-46.6%?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 20.9375, "loss": 4.180771350860596 }, { "id": "identity_12890", "image": "PMC-VQA/images/PMC8137781_Fig9_446471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality specifically seen in the capitellum on the lateral radiograph? \nChoices:\n(A) Joint Dislocation \n(B) Osseous Sclerosis \n(C) Bone Fragmentation \n(D) Tendon Tear " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.84375, "loss": 4.379108428955078 }, { "id": "identity_336511", "image": "PMC-VQA/images/PMC5068705_pone.0164392.g003_84926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging system used here?\nChoices:\n(A) PET/CT hybrid imaging system.\n(B) PET/MR fusion image.\n(C) MRI.\n(D) CT." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 4.431894779205322 }, { "id": "identity_20846", "image": "IconQA/images/99219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is third?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.0514936447143555 }, { "id": "identity_325877", "image": "PMC-VQA/images/PMC6849189_Fig2_66115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what plane is this image taken?\nChoices:\n(A) Axial plane \n(B) Sagittal plane \n(C) Coronal plane \n(D) Oblique plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.625, "loss": 4.23504114151001 }, { "id": "identity_279304", "image": "DocVQA/images/zhyc0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas were analyzed for the highest caloric deficiency in the population?" }, { "from": "gpt", "value": "The answer is Santiago and calama areas" } ], "clip_score": 22.640625, "loss": 5.299065113067627 }, { "id": "identity_22889", "image": "PMC-VQA/images/PMC7806504_Fig2_342165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was visible in the image? \nChoices:\n(A) Bacteria only \n(B) Lymphocyte cell infiltration only \n(C) Both bacteria and lymphocyte cell infiltration \n(D) None. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.140625, "loss": 4.744259834289551 }, { "id": "identity_213034", "image": "TextVQA/images/63556323e590d867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.6640625, "loss": 4.197603225708008 }, { "id": "identity_291833", "image": "IconQA/images/41351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of ovals an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.171875, "loss": 4.051668167114258 }, { "id": "identity_111038", "image": "A-OKVQA/images/512838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the obstacle the horse is jumping over?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.265625, "loss": 4.055210590362549 }, { "id": "identity_217266", "image": "TextVQA/images/049d516ccb244077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cheerleaders are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.625, "loss": 4.627218246459961 }, { "id": "identity_14358", "image": "DocVQA/images/hkyp0000_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the type of contact?" }, { "from": "gpt", "value": "The answer is complaint" } ], "clip_score": 19.28125, "loss": 4.114930629730225 }, { "id": "identity_288157", "image": "PMC-VQA/images/PMC7446008_F12_239553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is typically observed on the chest CT of a patient with SARS-CoV infection?\nChoices:\n(A) Normal \n(B) Bilateral diffuse centrilobular nodules \n(C) Consolidations with air bronchograms \n(D) Pleural effusion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5, "loss": 4.124764919281006 }, { "id": "identity_65063", "image": "AI2D/images/3548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"Vacuole\" refer to in the diagram?" }, { "from": "gpt", "value": "The answer is A storage organelle" } ], "clip_score": 32.46875, "loss": 4.004363059997559 }, { "id": "identity_265739", "image": "PMC-VQA/images/PMC5799328_Fig4_271947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the labels I-III represent in the figures provided?\nChoices:\n(A) various imaging techniques\n(B) different types of stains\n(C) different positions along the aorta\n(D) different time points in the imaging process" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.859375, "loss": 4.370370388031006 }, { "id": "identity_212926", "image": "A-OKVQA/images/321886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand name is advertised on the billboard in the center of the image?" }, { "from": "gpt", "value": "The answer is ABBA" } ], "clip_score": 25.921875, "loss": 4.205445766448975 }, { "id": "identity_2370", "image": "AI2D/images/3325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is represented by the label A in the above diagram?\nChoices:\n(A) Not in most plantcells\n(B) Ribosomes\n(C) Nucleus\n(D) Plasma membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.84375, "loss": 4.021373748779297 }, { "id": "identity_285093", "image": "TextVQA/images/20babd71404bfa95.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is controlled by the knob on the left?" }, { "from": "gpt", "value": "The answer is feedback and rotate " } ], "clip_score": 16.984375, "loss": 5.178533554077148 }, { "id": "identity_323451", "image": "PMC-VQA/images/PMC4707006_Fig1_460672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is shown in the radiograph?\nChoices:\n(A) Cow\n(B) Sheep\n(C) Goat\n(D) Horse" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.75, "loss": 4.543371200561523 }, { "id": "identity_311781", "image": "A-OKVQA/images/257774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white item on the shelf to the left?\nChoices:\n(A) cat\n(B) dishwasher\n(C) washing machine\n(D) microwave" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.421875, "loss": 4.181588172912598 }, { "id": "identity_242382", "image": "ScienceQA/images/20422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two heterozygous plants (Ff) are crossed, what is the expected ratio of offspring with terminal flowers to offspring with axial flowers, and how many offspring would you expect to have terminal flowers if there are 16 offspring in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.53125, "loss": 4.5989670753479 }, { "id": "identity_289416", "image": "A-OKVQA/images/567683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of weather is common in this area?\nChoices:\n(A) tundra\n(B) freezing\n(C) all\n(D) tropical" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.01953125, "loss": 4.012917995452881 }, { "id": "identity_269898", "image": "AI2D/images/1098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central or principal vein of a leaf called?\nChoices:\n(A) lobes\n(B) midrib\n(C) None of above\n(D) petiole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.109375, "loss": 4.002961158752441 }, { "id": "identity_332561", "image": "PMC-VQA/images/PMC3646783_pone-0063430-g001_203142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is depicted in this image?\nChoices:\n(A) Adipose tissue,\n(B) Nervous tissue,\n(C) Connective tissue,\n(D) Muscle tissue." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.53125, "loss": 4.250613212585449 }, { "id": "identity_2335", "image": "ScienceQA/images/13301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) opaque\n(B) scratchy\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.078125, "loss": 4.244365692138672 }, { "id": "identity_31540", "image": "PMC-VQA/images/PMC4526028_f1-mmr-12-03-4029_411993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) Incomplete membrane \n(B) Complete membrane \n(C) Both incomplete and complete membrane \n(D) Neither incomplete nor complete membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.15625, "loss": 4.125547885894775 }, { "id": "identity_288188", "image": "PMC-VQA/images/PMC7237671_Fig2_174224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category of images is depicted in the dataset provided?\nChoices:\n(A) CT scans\n(B) PET scans\n(C) MRI scans\n(D) X-ray scans" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0, "loss": 4.059202194213867 }, { "id": "identity_267470", "image": "TabMWP/images/16586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.40500545501709 }, { "id": "identity_15173", "image": "Super-CLEVR/images/superCLEVR_new_001287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a large purple thing; is its shape the same as the purple metal thing that is in front of the big green matte fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.0, "loss": 4.877933025360107 }, { "id": "identity_263013", "image": "TQA/images/abc_question_images/waves_19292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the letter that symbolizes a crest from the given choices. \n(A) P\n(B) V\n(C) F\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.4375, "loss": 4.304591655731201 }, { "id": "identity_217780", "image": "VQA2.0/images/491739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are harnessed together for plowing?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 32.03125, "loss": 4.1406097412109375 }, { "id": "identity_316221", "image": "TextVQA/images/028536373f325d0a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is this person?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 10.1171875, "loss": 4.328736305236816 }, { "id": "identity_128443", "image": "TextVQA/images/9fb79e28f8a6abb2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the book's cover at the bottom of the display?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 15.6015625, "loss": 4.380843162536621 }, { "id": "identity_36496", "image": "PMC-VQA/images/PMC4641347_Fig5_443040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to image the leaves? \nChoices:\n(A) Transmission electron microscopy \n(B) Scanning electron microscopy \n(C) Confocal laser scanning microscopy \n(D) Atomic force microscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.96875, "loss": 4.0226359367370605 }, { "id": "identity_286074", "image": "DocVQA/images/qtfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From whom is this letter?" }, { "from": "gpt", "value": "The answer is Ole Salthe" } ], "clip_score": 15.90625, "loss": 6.519712924957275 }, { "id": "identity_196417", "image": "A-OKVQA/images/272632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible stuffed animals are there on the middle bunk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.90625, "loss": 4.780071258544922 }, { "id": "identity_316943", "image": "PMC-VQA/images/PMC4432229_f02_386457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the boxed areas in the central panels?\nChoices:\n(A) Distribution of nucleoporins \n(B) MAC-specific nucleoporins \n(C) Twi1p \n(D) Schematic diagram" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.542105674743652 }, { "id": "identity_190315", "image": "VQA2.0/images/58107.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many jump obstacles are in the picture?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.75, "loss": 4.089043140411377 }, { "id": "identity_302856", "image": "PMC-VQA/images/PMC7393149_Fig1_222964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the green color in the three-dimensional view of this part of the eye?\nChoices:\n(A) Hyaloid vessels\n(B) Retinal tissues\n(C) Choroidal tissues\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.3125, "loss": 4.549305438995361 }, { "id": "identity_253360", "image": "DVQA/images/bar_train_00028672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across all stores, and which item has the highest total sales?" }, { "from": "gpt", "value": "The answer is sign" } ], "clip_score": 31.65625, "loss": 4.141561985015869 }, { "id": "identity_317226", "image": "PMC-VQA/images/PMC8019537_FIG2_409201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in this CT chest scan?\nChoices:\n(A) Left upper lobe consolidation.\n(B) Right lower lobe consolidation.\n(C) Bilateral lung nodules.\n(D) No significant findings." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.90625, "loss": 4.470488548278809 }, { "id": "identity_320568", "image": "PMC-VQA/images/PMC7690394_brainsci-10-00762-f002_305414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality related to this diffuse white matter?\nChoices:\n(A) Hypomyelination\n(B) Hypermyelination\n(C) Atrophy\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.828125, "loss": 4.026387691497803 }, { "id": "identity_60757", "image": "DocVQA/images/xznh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the memorandum?" }, { "from": "gpt", "value": "The answer is Great Western '70 Magazine" } ], "clip_score": 24.515625, "loss": 4.071921348571777 }, { "id": "identity_303579", "image": "Super-CLEVR/images/superCLEVR_new_000103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object that is both left of a red rubber truck and behind a big shiny dirtbike?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 26.0625, "loss": 4.39299201965332 }, { "id": "identity_34741", "image": "PMC-VQA/images/PMC8064296_F3_423567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the texture of the lipoma in the MRI image? \nChoices:\n(A) Hypointense \n(B) Hyperintense \n(C) Isointense \n(D) Not visible " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.96875, "loss": 4.077996730804443 }, { "id": "identity_274031", "image": "FigureQA/images/60774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Seafoam or Pale Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.40625, "loss": 4.516313076019287 }, { "id": "identity_284478", "image": "PMC-VQA/images/PMC7980418_fig05_395998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category do the images in this caption belong to?\nChoices:\n(A) CT scans\n(B) MRI images\n(C) Ultrasound scans\n(D) X-rays" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.265625, "loss": 4.595734596252441 }, { "id": "identity_145931", "image": "DocVQA/images/qjkm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which product seized by the FDA is mentioned in the article as being misbranded as effective in treating intestinal disorders?" }, { "from": "gpt", "value": "The answer is dried swiss whey" } ], "clip_score": 24.953125, "loss": 4.500229358673096 }, { "id": "identity_314313", "image": "ScienceQA/images/1947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a decomposer according to the arrows?\nChoices:\n(A) mushroom\n(B) lichen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.34375, "loss": 4.676003456115723 }, { "id": "identity_41611", "image": "FigureQA/images/35252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the smallest proportion?" }, { "from": "gpt", "value": "The answer is Peru" } ], "clip_score": 27.34375, "loss": 4.329028129577637 }, { "id": "identity_148917", "image": "DocVQA/images/tlnf0227_38.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was the print production of the annual report carried out?" }, { "from": "gpt", "value": "The answer is National Center" } ], "clip_score": 23.046875, "loss": 4.467299938201904 }, { "id": "identity_73740", "image": "ScienceQA/images/3405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the north pole in this image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.578125, "loss": 4.032118797302246 }, { "id": "identity_329697", "image": "PMC-VQA/images/PMC7711629_jpm-10-00218-f001_313425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely nature of the abnormality seen in the image?\nChoices:\n(A) Hemorrhage\n(B) Cyst\n(C) Edema\n(D) Tumor" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.828125, "loss": 4.557196617126465 }, { "id": "identity_299515", "image": "A-OKVQA/images/496362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are these two individuals currently positioned? \nOptions:\n(A) grass\n(B) street\n(C) trail\n(D) beach" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.34375, "loss": 4.476083755493164 }, { "id": "identity_31320", "image": "PMC-VQA/images/PMC5613789_F7_219958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the sagittal slice shown in the image? \nChoices:\n(A) 30 mm to the left of the AC \n(B) 56.5 mm to the right of the AC \n(C) 30 mm to the right of the AC \n(D) 56.5 mm to the left of the AC " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 4.172646999359131 }, { "id": "identity_275068", "image": "TabMWP/images/18882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With $487, can Jeanette afford to purchase both a coffee table and a desk?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.4375, "loss": 4.153255462646484 }, { "id": "identity_52326", "image": "FigureQA/images/42.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a smaller section than Medium Seafoam?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.078125, "loss": 4.723142147064209 }, { "id": "identity_331344", "image": "PMC-VQA/images/PMC7757331_F3_325476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to obtain these images?\nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.1875, "loss": 4.406342506408691 }, { "id": "identity_324223", "image": "A-OKVQA/images/553640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event are the people in the middle of?\nChoices:\n(A) wedding\n(B) birthday\n(C) funeral\n(D) graduation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.515625, "loss": 4.229187488555908 }, { "id": "identity_320820", "image": "PMC-VQA/images/PMC5690730_F3_241263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the nodule located?\nChoices:\n(A) Left adductor magnus\n(B) Right adductor magnus\n(C) Left femur\n(D) Right femur" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.4609270095825195 }, { "id": "identity_16721", "image": "AI2D/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do deer consume\nChoices:\n(A) plants\n(B) deer\n(C) rodents\n(D) snakes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.140625, "loss": 4.15555477142334 }, { "id": "identity_311198", "image": "TextVQA/images/047fc464e23ef72e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of cosmetics are visible here?" }, { "from": "gpt", "value": "The answer is oppi" } ], "clip_score": 22.703125, "loss": 5.477261066436768 }, { "id": "identity_187235", "image": "PMC-VQA/images/PMC8100688_F1_434449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain maps are depicted in the figure?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.03125, "loss": 4.362559795379639 }, { "id": "identity_295359", "image": "PMC-VQA/images/PMC3865638_fig1_252301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in case 1?\nOptions:\n(A) Subarachnoid hemorrhage\n(B) Cerebral microbleeds in corpus callosum\n(C) Hemorrhage in the frontal lobe\n(D) Normal brain imaging findings." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.796875, "loss": 4.1097917556762695 }, { "id": "identity_299912", "image": "PMC-VQA/images/PMC9341008_F4_361411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the CSF filled clefts located?\nChoices:\n(A) Temporal lobe \n(B) Occipital lobe \n(C) Frontal lobe \n(D) Parietal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.621026039123535 }, { "id": "identity_274816", "image": "AI2D/images/3035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is surrounded by the Nucleus?\nChoices:\n(A) cell wall\n(B) nuclear pore\n(C) ribosome\n(D) Nucleolus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.734375, "loss": 4.164495468139648 }, { "id": "identity_255468", "image": "AI2D/images/246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of rats decreases, how will this affect the population of both the snakes and the hawks in the food chain?" }, { "from": "gpt", "value": "The answer is Both populations will decrease" } ], "clip_score": 24.59375, "loss": 4.334716320037842 }, { "id": "identity_304330", "image": "AI2D/images/4212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does M indicate?\nChoices:\n(A) Mid-ocean ridge\n(B) Oceanic plate\n(C) Continental plate\n(D) CO2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.421875, "loss": 4.8348870277404785 }, { "id": "identity_130782", "image": "A-OKVQA/images/235466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible brooms are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.953125, "loss": 4.241278648376465 }, { "id": "identity_186635", "image": "VizWiz/images/VizWiz_train_00023765.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the side table near the wall?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 27.03125, "loss": 4.0177483558654785 }, { "id": "identity_261354", "image": "ScienceQA/images/10111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which characteristic corresponds to this object?\nChoices:\n(A) rough\n(B) transparent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.75, "loss": 4.698915958404541 }, { "id": "identity_188505", "image": "A-OKVQA/images/140843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing yellow boots?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.875, "loss": 4.360288143157959 }, { "id": "identity_29272", "image": "TextVQA/images/d383dd516697e184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what number is referenced on the orange monitor?" }, { "from": "gpt", "value": "The answer is apple" } ], "clip_score": 20.84375, "loss": 4.274844169616699 }, { "id": "identity_58087", "image": "DocVQA/images/njnp0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medium of the artwork \"Madama Butterfly\"?" }, { "from": "gpt", "value": "The answer is mixed media print" } ], "clip_score": 24.34375, "loss": 4.401460647583008 }, { "id": "identity_23683", "image": "TQA/images/question_images/human_system_excretory_6108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The renal artery and vein branch off from which structure?\nChoices:\n(A) kidney\n(B) Vena cava\n(C) ureter\n(D) dorsal aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.40625, "loss": 4.268853187561035 }, { "id": "identity_30927", "image": "PMC-VQA/images/PMC8801695_F6_180439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image has the blue areas that are most visible? \nChoices:\n(A) Original sharp-kernel \n(B) Median-filtered \n(C) Kernel-converted \n(D) Original soft-kernel " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.75, "loss": 4.810853004455566 }, { "id": "identity_329191", "image": "TQA/images/question_images/human_system_nervous_6191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is found at the front of this brain?\nChoices:\n(A) Lateral sulcus\n(B) Parietal lobe\n(C) Frontal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.21875, "loss": 4.1903276443481445 }, { "id": "identity_327205", "image": "A-OKVQA/images/370922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which public agency uses the ATVs being carried on the flatbed truck?\nChoices:\n(A) fire department\n(B) police\n(C) city hall\n(D) health department" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.3522467613220215 }, { "id": "identity_291497", "image": "A-OKVQA/images/98892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main purpose of the lights located on the rear of bicycles?\nChoices:\n(A) fun\n(B) identification\n(C) safety\n(D) decoration" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.578125, "loss": 4.957226753234863 }, { "id": "identity_271824", "image": "TQA/images/question_images/human_system_ear_6098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To what is the malleus connected? \n(A) staples\n(B) semicircular canals\n(C) cochlea\n(D) incus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.859375, "loss": 4.17590856552124 }, { "id": "identity_282249", "image": "A-OKVQA/images/232546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of business is being advertised on the white sign? Is it a pharmacy, car shop, route store, or grocer?\nChoices:\n(A) pharmacy\n(B) car shop\n(C) route store\n(D) grocer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.09375, "loss": 4.264649391174316 }, { "id": "identity_223691", "image": "TextVQA/images/153f0d610312a736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which game is placed directly on top of the 'Carcassonne' box?" }, { "from": "gpt", "value": "The answer is Zack & Pack" } ], "clip_score": 23.453125, "loss": 4.664539337158203 }, { "id": "identity_57752", "image": "DVQA/images/bar_train_00111921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label on the x-axis has the highest value for 'motel'?" }, { "from": "gpt", "value": "The answer is head" } ], "clip_score": 30.234375, "loss": 4.243365287780762 }, { "id": "identity_28000", "image": "A-OKVQA/images/481596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would you usually see the king on the back of the yellow shirt?\nChoices:\n(A) restaurant\n(B) playing card\n(C) cookie box\n(D) english tea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.484375, "loss": 4.406379222869873 }, { "id": "identity_107503", "image": "VizWiz/images/VizWiz_train_00011327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written on the blue tape near the water inlet?" }, { "from": "gpt", "value": "The answer is Hot" } ], "clip_score": 20.421875, "loss": 4.48898458480835 }, { "id": "identity_51834", "image": "DVQA/images/bar_train_00081066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the second highest number of units sold?" }, { "from": "gpt", "value": "The answer is actor" } ], "clip_score": 23.28125, "loss": 4.008008003234863 }, { "id": "identity_182200", "image": "PMC-VQA/images/PMC5740132_Fig4_255954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red areas in the ADC images?" }, { "from": "gpt", "value": "The answer is Restricted diffusion regions" } ], "clip_score": 31.265625, "loss": 4.844152450561523 }, { "id": "identity_15831", "image": "A-OKVQA/images/116882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does it look like the stuffed animal is doing?\nChoices:\n(A) eating\n(B) drinking\n(C) singing\n(D) attacking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.40625, "loss": 4.6274871826171875 }, { "id": "identity_286102", "image": "PMC-VQA/images/PMC4232727_Fig2_336427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image of grape berry tissue regions provided?\nChoices:\n(A) X-ray imaging\n(B) CT scanning\n(C) Magnetic resonance imaging\n(D) Ultrasound imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.515625, "loss": 4.2074103355407715 }, { "id": "identity_121992", "image": "Super-CLEVR/images/superCLEVR_new_000698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located to the left of the green motorcycle?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 28.765625, "loss": 4.274083614349365 }, { "id": "identity_300884", "image": "PMC-VQA/images/PMC4164002_F1_320230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality observed in this MRI scan?\nChoices:\n(A) Edema over some hippocampus.\n(B) Edema over some hippocampus.\n(C) Hippocampal neurosis.\n(D) Cerebral atrophy." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.046875, "loss": 4.0681962966918945 }, { "id": "identity_21202", "image": "IconQA/images/67879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 4.90364408493042 }, { "id": "identity_143388", "image": "DocVQA/images/qqjw0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company that received the payment?" }, { "from": "gpt", "value": "The answer is HDR" } ], "clip_score": 22.90625, "loss": 5.210970401763916 }, { "id": "identity_315181", "image": "DocVQA/images/qfjg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where did he pass recently?" }, { "from": "gpt", "value": "The answer is Camp Miniwauca" } ], "clip_score": 17.6875, "loss": 4.195030212402344 }, { "id": "identity_52976", "image": "DocVQA/images/ssjw0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are listed under the \"Officers & Staff\" section?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.6875, "loss": 4.523745059967041 }, { "id": "identity_126090", "image": "TextVQA/images/fa8fed52f9fd7eb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the presentation based on the slide content?" }, { "from": "gpt", "value": "The answer is Apple production" } ], "clip_score": 25.796875, "loss": 5.204402923583984 }, { "id": "identity_331363", "image": "PMC-VQA/images/PMC8834445_cells-11-00555-f005_190143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted on the right side of the images from control (c,d)?\nChoices:\n(A) Views of the regions marked by squares.\n(B) Views of the regions marked by triangles.\n(C) Views of the regions marked by circles.\n(D) Views of the regions marked by pentagons." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2421875, "loss": 4.4473066329956055 }, { "id": "identity_15726", "image": "A-OKVQA/images/317851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the hood of the car open?\nChoices:\n(A) for repairs\n(B) capture rain\n(C) trap birds\n(D) loading luggage" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.421875, "loss": 4.184361457824707 }, { "id": "identity_151", "image": "PMC-VQA/images/PMC8312902_F1_20344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red and blue box insets in the image? \nChoices:\n(A) An electron microscope \n(B) Viral replication complex \n(C) RNA \n(D) A protein " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.09375, "loss": 4.588021755218506 }, { "id": "identity_328014", "image": "A-OKVQA/images/178505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where in the world would you most likely find a place like this?\nChoices:\n(A) siberia\n(B) mexico\n(C) iceland\n(D) antarctica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 4.36328125, "loss": 4.422187805175781 }, { "id": "identity_295328", "image": "PMC-VQA/images/PMC3789311_fig2_235557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the US image?\nChoices:\n(A) A homogeneous mass\n(B) A heterogeneous mass with solid tumor\n(C) A simple cystic structure\n(D) An anechoic cystic structure with slightly echogenic thick walls." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.40625, "loss": 4.015555381774902 }, { "id": "identity_22833", "image": "A-OKVQA/images/237210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: These cars are stuck in what?\nChoices:\n(A) parade\n(B) car show\n(C) traffic jam\n(D) parking lot" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.875, "loss": 4.168753147125244 }, { "id": "identity_333026", "image": "A-OKVQA/images/188752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can hanging these avoid?\nChoices:\n(A) being damaged\n(B) black spots\n(C) being touched\n(D) being stolen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8125, "loss": 4.308517932891846 }, { "id": "identity_204754", "image": "VQA2.0/images/511930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the apron worn by the person on the right?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 20.453125, "loss": 4.369401931762695 }, { "id": "identity_105389", "image": "A-OKVQA/images/175831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the gate to the building open or closed?" }, { "from": "gpt", "value": "The answer is Open" } ], "clip_score": 16.515625, "loss": 4.038678169250488 }, { "id": "identity_266785", "image": "VizWiz/images/VizWiz_train_00006736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is currently displayed on the screen, and is there any action required to proceed further?" }, { "from": "gpt", "value": "The answer is red x" } ], "clip_score": 23.953125, "loss": 6.178932189941406 }, { "id": "identity_334285", "image": "PMC-VQA/images/PMC6104493_Figure1_357550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to generate some of the images B, E, and H?\nChoices:\n(A) T1-weighted image\n(B) FLAIR image\n(C) T2-weighted image\n(D) Diffusion weighted image." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.75, "loss": 4.133593559265137 }, { "id": "identity_23040", "image": "A-OKVQA/images/557434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of product is most probably being transported in the last and tallest box on the truck?\nChoices:\n(A) clothing\n(B) home appliance\n(C) food\n(D) auto parts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3515625, "loss": 4.959745407104492 }, { "id": "identity_20834", "image": "IconQA/images/8372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is second?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.390625, "loss": 4.050201416015625 }, { "id": "identity_322239", "image": "PMC-VQA/images/PMC7988374_Fig2_398467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Left pelvis\n(B) Abdomen\n(C) Right pelvis\n(D) Chest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.875, "loss": 4.418051242828369 }, { "id": "identity_292888", "image": "VQA-AS/images/19300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the paintings look the same?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.3125, "loss": 4.150445938110352 }, { "id": "identity_158142", "image": "ScienceQA/images/19280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hydrogen (H) atoms are present in the ethanol molecule?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 29.109375, "loss": 4.457906723022461 }, { "id": "identity_188900", "image": "PMC-VQA/images/PMC7694461_viruses-12-01258-f008_307266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the text \"rED\\[ZIKAL\\]Vax-(I152, T156, H158)\" indicate about the image?" }, { "from": "gpt", "value": "The answer is Label for the sample or experiment" } ], "clip_score": 22.625, "loss": 4.753527641296387 }, { "id": "identity_221289", "image": "A-OKVQA/images/563713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handles does the vase have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.234375, "loss": 4.109622955322266 }, { "id": "identity_34415", "image": "A-OKVQA/images/9542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter is obscured by the sign after the PHO?\nChoices:\n(A) n\n(B) t\n(C) m\n(D) g" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.09375, "loss": 4.0852274894714355 }, { "id": "identity_38551", "image": "DocVQA/images/hkxw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Program Name ?" }, { "from": "gpt", "value": "The answer is Evergreen" } ], "clip_score": 19.109375, "loss": 4.227341175079346 }, { "id": "identity_149729", "image": "DocVQA/images/psxw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of item VII on the agenda?" }, { "from": "gpt", "value": "The answer is Bank Authorization" } ], "clip_score": 20.296875, "loss": 5.41017484664917 }, { "id": "identity_21250", "image": "PMC-VQA/images/PMC5585196_F2_211657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view was used to classify iNPH and HC groups? \nChoices:\n(A) Bottom view \n(B) Front view \n(C) Left view \n(D) Top view " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.71875, "loss": 4.994111061096191 }, { "id": "identity_129120", "image": "VQA-AS/images/1934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pants of the person standing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.125, "loss": 4.971004486083984 }, { "id": "identity_204123", "image": "VQA2.0/images/244293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow cabs are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.234375, "loss": 4.702476978302002 }, { "id": "identity_108835", "image": "Super-CLEVR/images/superCLEVR_new_001039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the gold vehicle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.5625, "loss": 4.192723274230957 }, { "id": "identity_202590", "image": "A-OKVQA/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many panniers are attached to the bike?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 29.953125, "loss": 4.21712589263916 }, { "id": "identity_26600", "image": "PMC-VQA/images/PMC8212978_F1_470952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the leaf was avoided during laser targeting? \nChoices:\n(A) Leaf tip \n(B) Leaf stomata \n(C) Leaf veins \n(D) Leaf center " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.75, "loss": 4.108561038970947 }, { "id": "identity_263069", "image": "Super-CLEVR/images/superCLEVR_new_000652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the big mountain bike the same as that of the double bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.4375, "loss": 5.068500995635986 }, { "id": "identity_24105", "image": "A-OKVQA/images/223612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of loads are the trucks probably used to haul?\nChoices:\n(A) gravel\n(B) metal\n(C) trash\n(D) furniture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1328125, "loss": 4.2093825340271 }, { "id": "identity_183170", "image": "A-OKVQA/images/134870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the pole closest to the camera?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.59375, "loss": 4.1912970542907715 }, { "id": "identity_22647", "image": "A-OKVQA/images/97667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leavening was used most here?\nChoices:\n(A) none\n(B) salt\n(C) yeast\n(D) vegemite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6328125, "loss": 4.4695281982421875 }, { "id": "identity_13798", "image": "Super-CLEVR/images/superCLEVR_new_000357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the cyan aeroplane the same as the yellow chopper?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.375, "loss": 5.0428032875061035 }, { "id": "identity_110733", "image": "VQA2.0/images/324823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman doing at the table?" }, { "from": "gpt", "value": "The answer is Reading" } ], "clip_score": 25.28125, "loss": 4.140622138977051 }, { "id": "identity_228978", "image": "A-OKVQA/images/137748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light, and how many cars are moving?" }, { "from": "gpt", "value": "The answer is Green, one" } ], "clip_score": 19.140625, "loss": 4.08579158782959 }, { "id": "identity_302384", "image": "FigureQA/images/51778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Light Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 4.072787284851074 }, { "id": "identity_33466", "image": "A-OKVQA/images/401132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the woman in the grey shirt feeling?\nChoices:\n(A) depressed\n(B) hostile\n(C) mad\n(D) excited" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.3203125, "loss": 4.052428245544434 }, { "id": "identity_17847", "image": "A-OKVQA/images/201141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green stalk for?\nChoices:\n(A) sale\n(B) decoration\n(C) growing bananas\n(D) hanging bananas" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.75, "loss": 4.098679542541504 }, { "id": "identity_187825", "image": "TextVQA/images/4966c6ae95fc4ca7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the letter \"Y\" on the tail of the aircraft?" }, { "from": "gpt", "value": "The answer is Squadron identifier" } ], "clip_score": 19.53125, "loss": 5.18170690536499 }, { "id": "identity_318582", "image": "Super-CLEVR/images/superCLEVR_new_001179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the purple metallic object next to the blue vehicle and the suv in front of the purple bike have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 35.75, "loss": 5.24722957611084 }, { "id": "identity_14509", "image": "A-OKVQA/images/468471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What space is this bed located in?\nChoices:\n(A) furniture store\n(B) doctors office\n(C) waiting room\n(D) video store" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.890625, "loss": 4.530335426330566 }, { "id": "identity_147954", "image": "DocVQA/images/qndd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the second person mentioned in the document?" }, { "from": "gpt", "value": "The answer is Verna Schneider" } ], "clip_score": 21.921875, "loss": 5.136885643005371 }, { "id": "identity_14069", "image": "A-OKVQA/images/524255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What entertainment do these people have to amuse them?\nChoices:\n(A) opera\n(B) tv\n(C) horseback tricks\n(D) singing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.109375, "loss": 4.342824459075928 }, { "id": "identity_315295", "image": "PMC-VQA/images/PMC7291746_Fig1_191574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging method was used to show exudative retinal detachment?\nChoices:\n(A) Fluorescein angiography \n(B) Optical coherence tomography \n(C) Ocular ultrasonography \n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.640625, "loss": 4.167459964752197 }, { "id": "identity_10128", "image": "DocVQA/images/nfnw0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Account Number?" }, { "from": "gpt", "value": "The answer is da0608" } ], "clip_score": 18.515625, "loss": 4.108895301818848 }, { "id": "identity_330680", "image": "IconQA/images/36592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a train. What is the ninth image?\nChoices:\n(A) train\n(B) bear\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.46875, "loss": 4.109183311462402 }, { "id": "identity_276177", "image": "VQA2.0/images/572789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl's hair straight?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.5390625, "loss": 4.112192630767822 }, { "id": "identity_300228", "image": "ScienceQA/images/5908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tallahassee\n(B) Salem\n(C) Tampa\n(D) Orlando" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.0625, "loss": 4.301970958709717 }, { "id": "identity_35261", "image": "IconQA/images/3493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.6875, "loss": 4.143676280975342 }, { "id": "identity_276093", "image": "PMC-VQA/images/PMC7161170_Fig1_152846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scans were utilized in this study? \nOptions:\n(A) MRI\n(B) X-ray\n(C) CT scan\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.265625, "loss": 4.005438804626465 }, { "id": "identity_21529", "image": "A-OKVQA/images/574215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sauce is on this pizza?\nChoices:\n(A) cheese\n(B) dyed\n(C) white\n(D) tomato" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.09375, "loss": 4.035833835601807 }, { "id": "identity_27695", "image": "PMC-VQA/images/PMC2940168_F0001_73732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the image?\nChoices:\n(A) Bilateral abscess \n(B) Abscess deep in the left pectoralis major muscle \n(C) Abscess deep in the right pectoralis major muscle extending into the axilla \n(D) Abscess in the right arm muscles " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 4.153697490692139 }, { "id": "identity_103339", "image": "PMC-VQA/images/PMC7911842_diagnostics-11-00169-f001_375906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images show a cross-sectional view of the brain?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 26.0625, "loss": 4.082394599914551 }, { "id": "identity_157140", "image": "ScienceQA/images/20869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fins can be observed on the goldfish in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.078125, "loss": 4.445310115814209 }, { "id": "identity_72303", "image": "TQA/images/abc_question_images/parts_microscope_17189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled with the letter K?" }, { "from": "gpt", "value": "The answer is Arm" } ], "clip_score": 19.90625, "loss": 4.492894172668457 }, { "id": "identity_213113", "image": "VizWiz/images/VizWiz_train_00010803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are on the windowsill?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.9375, "loss": 4.192873954772949 }, { "id": "identity_221664", "image": "A-OKVQA/images/400477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.375, "loss": 4.622486114501953 }, { "id": "identity_327965", "image": "PMC-VQA/images/PMC7681948_Fig2_302948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this preoperative B-scan ultrasonographic image indicate? \nChoices:\n(A) Detached retina \n(B) Detached choroid \n(C) Disruption of zonules \n(D) Subluxated lens" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.09557580947876 }, { "id": "identity_308573", "image": "PMC-VQA/images/PMC5540294_Fig4_198895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in c and d?\nChoices:\n(A) Normal detached cells \n(B) Ruptured detached cells \n(C) Merofusosomes \n(D) Absent merozoite formation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.84375, "loss": 4.145913124084473 }, { "id": "identity_332801", "image": "A-OKVQA/images/424300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is depicted?\nChoices:\n(A) field\n(B) jungle\n(C) beach\n(D) forest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.3203125, "loss": 4.1745758056640625 }, { "id": "identity_285107", "image": "PMC-VQA/images/PMC4157140_fig1_318564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed adjacent to the anterior horn of the right lateral ventricle?\nChoices:\n(A) numerous small, well-delineated cysts \n(B) signal abnormalities \n(C) nothing abnormal \n(D) additional minor abnormalities" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.28125, "loss": 4.60022497177124 }, { "id": "identity_29667", "image": "ScienceQA/images/20812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Leon's test show?\nChoices:\n(A) how well the weather station would work when it was windy\n(B) if the weather station would work when the temperature was 50°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.953125, "loss": 4.365054130554199 }, { "id": "identity_326971", "image": "PMC-VQA/images/PMC3227162_F1_117676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the thrombus located in the image?\nChoices:\n(A) Right atrium \n(B) Right ventricle \n(C) Left atrium \n(D) Left ventricle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0625, "loss": 4.257062911987305 }, { "id": "identity_282666", "image": "PMC-VQA/images/PMC6770061_ijms-20-04401-f001_43929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk in the image represent in terms of structure?\nChoices:\n(A) IMCD \n(B) Thick limbs \n(C) Thin limbs \n(D) Expression of claudin-19 TJ in IMCD" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5625, "loss": 4.690277099609375 }, { "id": "identity_316091", "image": "ScienceQA/images/18416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted colony?\nChoices:\n(A) New Jersey\n(B) South Carolina\n(C) North Carolina\n(D) Delaware" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.265625, "loss": 4.184481143951416 }, { "id": "identity_223073", "image": "PMC-VQA/images/PMC4494931_F1_404044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to the abnormality?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.65625, "loss": 4.127853870391846 }, { "id": "identity_182232", "image": "VQA2.0/images/578137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bird is standing on the grass?" }, { "from": "gpt", "value": "The answer is Egret" } ], "clip_score": 25.890625, "loss": 4.291283130645752 }, { "id": "identity_283541", "image": "PMC-VQA/images/PMC6132558_Fig5_366058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the red arrows shown in the images?\nChoices:\n(A) aneurysm in the proximal segment of the RCA. \n(B) stenosis in the mid segment of the RCA. \n(C) normal segment of the RCA. \n(D) none of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.609375, "loss": 4.2436909675598145 }, { "id": "identity_293080", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of axles?" }, { "from": "gpt", "value": "The answer is aviva stadium" } ], "clip_score": 10.3046875, "loss": 5.5670976638793945 }, { "id": "identity_289501", "image": "A-OKVQA/images/26448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material was utilized to represent the snow in this artwork? Was it:\n(A) bubbles\n(B) cotton\n(C) feathers\n(D) yarn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.015625, "loss": 4.780524253845215 }, { "id": "identity_23508", "image": "A-OKVQA/images/547431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The large blue plane has a logo that looks similar to the logo of what brand?\nChoices:\n(A) pepsi\n(B) green giant\n(C) nathan's\n(D) mcdonald's" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.9375, "loss": 4.141056537628174 }, { "id": "identity_9561", "image": "TextVQA/images/6028c49070ee2b28.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the bus on the right?" }, { "from": "gpt", "value": "The answer is moritz" } ], "clip_score": 16.6875, "loss": 4.841648578643799 }, { "id": "identity_325493", "image": "FigureQA/images/76944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.75, "loss": 4.298067092895508 }, { "id": "identity_300859", "image": "TextVQA/images/b190e374bbf24171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the phone stand in this box?" }, { "from": "gpt", "value": "The answer is drivesafe" } ], "clip_score": 23.609375, "loss": 4.264162540435791 }, { "id": "identity_35483", "image": "A-OKVQA/images/494968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What city might this bike be ridden in?\nChoices:\n(A) san francisco\n(B) new york\n(C) los angeles\n(D) chicago" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.4375, "loss": 4.009068012237549 }, { "id": "identity_330967", "image": "PMC-VQA/images/PMC4522994_F2_411360.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is being compressed in this view?\nChoices:\n(A) renal artery\n(B) common bile duct\n(C) pulmonary trunk\n(D) inferior vena cava" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.828125, "loss": 4.862013816833496 }, { "id": "identity_305880", "image": "AI2D/images/202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which shows a partial food web, which organism provides energy to the mouse?\nChoices:\n(A) a fox\n(B) an eagle\n(C) an owl\n(D) the grass" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.1875, "loss": 4.132625102996826 }, { "id": "identity_289180", "image": "DocVQA/images/fmjm0223_48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for providing the accurate information?" }, { "from": "gpt", "value": "The answer is Author's" } ], "clip_score": 25.078125, "loss": 4.1310505867004395 }, { "id": "identity_312246", "image": "TQA/images/abc_question_images/layers_of_atmosphere_17067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents a part of the atmosphere?\nChoices:\n(A) M\n(B) N\n(C) W\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.75, "loss": 4.1097283363342285 }, { "id": "identity_25202", "image": "A-OKVQA/images/436127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which state is this street located?\nChoices:\n(A) idaho\n(B) illinois\n(C) ohio\n(D) michigan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.96875, "loss": 4.234918117523193 }, { "id": "identity_310319", "image": "TQA/images/abc_question_images/human_system_excretory_16117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of U linking J to M?\nChoices:\n(A) FALLOPIAN TUBE\n(B) BRAIN\n(C) URETHRA \n(D) OVARIES" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.4375, "loss": 4.461200714111328 }, { "id": "identity_276728", "image": "ScienceQA/images/9878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\n(A) Frankfort\n(B) Carson City\n(C) Helena\n(D) Denver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.578125, "loss": 4.184230804443359 }, { "id": "identity_303990", "image": "FigureQA/images/20172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color greater than Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.734375, "loss": 4.274431228637695 }, { "id": "identity_327026", "image": "AI2D/images/4472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image shows Cavus foot?\nChoices:\n(A) B\n(B) Both\n(C) None of above\n(D) A" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.015625, "loss": 4.011464595794678 }, { "id": "identity_205554", "image": "A-OKVQA/images/484273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bowl to the right of the stove?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.140625, "loss": 4.885851860046387 }, { "id": "identity_31887", "image": "PMC-VQA/images/PMC4174767_F6_322464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain surfaces are displayed in the GWC imaging modality? \nChoices:\n(A) Anterior and Posterior \n(B) Medial and Lateral \n(C) Interior and Exterior \n(D) Superior and Inferior " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.453125, "loss": 4.01281213760376 }, { "id": "identity_290254", "image": "A-OKVQA/images/215213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these individuals sharing this room?\nChoices:\n(A) punishment\n(B) convention\n(C) emergency evacuation\n(D) imprisonment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.265625, "loss": 4.297245979309082 }, { "id": "identity_204218", "image": "TextVQA/images/10e32d2a0fa3eefc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the spacecraft depicted on the cover?" }, { "from": "gpt", "value": "The answer is HOLO X" } ], "clip_score": 26.359375, "loss": 4.493912220001221 }, { "id": "identity_10980", "image": "FigureQA/images/15704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.84375, "loss": 4.750059604644775 }, { "id": "identity_116211", "image": "A-OKVQA/images/272807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.40625, "loss": 4.079182147979736 }, { "id": "identity_182355", "image": "PMC-VQA/images/PMC3912456_F1_263736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 22.875, "loss": 4.025061130523682 }, { "id": "identity_142690", "image": "DocVQA/images/rzxv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the growth regulator mentioned in the document?" }, { "from": "gpt", "value": "The answer is Ortho-nil" } ], "clip_score": 22.0625, "loss": 4.331509590148926 }, { "id": "identity_271865", "image": "AI2D/images/3264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label correctly identifies b among the following choices: smooth er, rough er, golgi apparatus, or lysosome?\nChoices:\n(A) smooth er\n(B) rough er\n(C) golgi apparatus\n(D) lysosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.609375, "loss": 4.412755489349365 }, { "id": "identity_326638", "image": "A-OKVQA/images/507927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what type of setting are these people?\n\nChoices:\n(A) city\n(B) collegiate\n(C) mall\n(D) farm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.390625, "loss": 4.495189189910889 }, { "id": "identity_16897", "image": "ScienceQA/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is tillite?\nChoices:\n(A) igneous\n(B) sedimentary\n(C) metamorphic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.08669900894165 }, { "id": "identity_41730", "image": "DocVQA/images/zqld0217_21.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document attached to the Neurontin 1997 Tactical Plan?" }, { "from": "gpt", "value": "The answer is AED interactions dosage card" } ], "clip_score": 37.15625, "loss": 4.989434242248535 }, { "id": "identity_124038", "image": "A-OKVQA/images/451679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.4921875, "loss": 4.598117828369141 }, { "id": "identity_128228", "image": "TextVQA/images/cd9cc90434be259b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated in the audience area directly behind the boxer?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.765625, "loss": 4.440354347229004 }, { "id": "identity_258619", "image": "PMC-VQA/images/PMC7003341_Fig3_111945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the radiographic features indicating the presence of effusion in the right chest, and how do they differ from the left chest?" }, { "from": "gpt", "value": "The answer is Fluid accumulation, white area on right, clearer left lung" } ], "clip_score": 26.734375, "loss": 4.001761436462402 }, { "id": "identity_24839", "image": "Super-CLEVR/images/superCLEVR_new_001358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object that is right of the large matte bicycle have the same shape as the small blue thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 33.5625, "loss": 5.474880218505859 }, { "id": "identity_49910", "image": "DocVQA/images/txdw0217_44.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the treatment mentioned in the alternative medicines section?" }, { "from": "gpt", "value": "The answer is Red clover" } ], "clip_score": 17.609375, "loss": 4.269872188568115 }, { "id": "identity_296727", "image": "A-OKVQA/images/153692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following objects shares a color with the frame of the bike? \n(A) blueberries\n(B) oranges\n(C) bananas\n(D) watermelon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.90625, "loss": 4.024874687194824 }, { "id": "identity_155959", "image": "ScienceQA/images/13711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the reptile's skin?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 30.8125, "loss": 4.100511074066162 }, { "id": "identity_335757", "image": "A-OKVQA/images/508962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What character is the person looking at their cell phone portraying?\nChoices:\n(A) mary poppins\n(B) jesus\n(C) sweeny todd\n(D) santa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.1875, "loss": 4.425987243652344 }, { "id": "identity_222701", "image": "TextVQA/images/64453069f751284d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand are the watches in the image?" }, { "from": "gpt", "value": "The answer is MomoDesign" } ], "clip_score": 32.71875, "loss": 4.195364475250244 }, { "id": "identity_93886", "image": "IconQA/images/95299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of number display does the digital clock use?" }, { "from": "gpt", "value": "The answer is Digital" } ], "clip_score": 30.96875, "loss": 4.152850151062012 }, { "id": "identity_295883", "image": "TabMWP/images/11825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the data provided in the table, what was the rate of change in the number of straight-A students from 2020 to 2021 according to the school administrator concerned about grade inflation?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 31.078125, "loss": 4.250957489013672 }, { "id": "identity_337514", "image": "PMC-VQA/images/PMC2488350_F4_25949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the three parts of the midgut called?\nChoices:\n(A) Front, Middle, Back \n(B) Anterior, Posterior, Dorsal \n(C) Superior, Inferior, Anterior \n(D) Cranial, Caudal, Medial" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6171875, "loss": 4.115179538726807 }, { "id": "identity_14669", "image": "A-OKVQA/images/538057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might be taking place in the building to the right?\nChoices:\n(A) baseball game\n(B) roller derby\n(C) worship service\n(D) property auction" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.796875, "loss": 4.568075656890869 }, { "id": "identity_207793", "image": "PMC-VQA/images/PMC4875637_Fig2_32540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might the blue areas in the image indicate?" }, { "from": "gpt", "value": "The answer is Lower brain activity or function" } ], "clip_score": 32.125, "loss": 4.46148681640625 }, { "id": "identity_197789", "image": "A-OKVQA/images/385709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team in the field?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.78125, "loss": 4.4611358642578125 }, { "id": "identity_145998", "image": "DocVQA/images/tzwm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of Willard Krehl?" }, { "from": "gpt", "value": "The answer is Community Medicine, Jefferson Medical College" } ], "clip_score": 22.984375, "loss": 4.012991905212402 }, { "id": "identity_31056", "image": "PMC-VQA/images/PMC8150050_RSOS210644F3_450987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the cyan color indicate in the LSCM images? \nChoices:\n(A) Dead coral tissue \n(B) Healthy coral tissue \n(C) Algae \n(D) Water " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.658085823059082 }, { "id": "identity_331727", "image": "PMC-VQA/images/PMC7456424_Fig3_242275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of photoreceptor is shown in the image?\nChoices:\n(A) Rods\n(B) Cones\n(C) Bipolar cells\n(D) Ganglion cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.84375, "loss": 4.299108028411865 }, { "id": "identity_154982", "image": "ScienceQA/images/19050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible legs does the beaver have in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.734375, "loss": 4.165582180023193 }, { "id": "identity_285989", "image": "PMC-VQA/images/PMC5745994_Fig1_257987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the hand was impacted by the pathological tissue? \nOptions:\n(A) Dorsum\n(B) Hypothenar eminence\n(C) Thenar eminence\n(D) Palm of the hand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 4.453486442565918 }, { "id": "identity_58250", "image": "PlotQA/images/132246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the X-axis?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 16.90625, "loss": 4.044423580169678 }, { "id": "identity_140418", "image": "DocVQA/images/ygyk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the letter addressed to?" }, { "from": "gpt", "value": "The answer is Jennifer Joistead-Collins" } ], "clip_score": 21.78125, "loss": 4.7778000831604 }, { "id": "identity_321620", "image": "AI2D/images/103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what stage can it fly?\nChoices:\n(A) Adult\n(B) Eggs\n(C) Pupa\n(D) Nymph" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.9375, "loss": 4.101001262664795 }, { "id": "identity_278303", "image": "TQA/images/question_images/lewis_dot_diagrams_9144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image represents the atom of Hydrogen?\nChoices:\n(A) both\n(B) image on the right\n(C) left image\n(D) none" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.09375, "loss": 4.274095058441162 }, { "id": "identity_20847", "image": "PMC-VQA/images/PMC7429820_Fig1_234904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the vertical position of the text indicate in the figure? \nChoices:\n(A) Frequency \n(B) Amplitude \n(C) Time \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.5390625, "loss": 4.974139213562012 }, { "id": "identity_148630", "image": "DocVQA/images/xrvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the Rehab Nurse with the initials \"F.R.\"?" }, { "from": "gpt", "value": "The answer is Robinson" } ], "clip_score": 20.421875, "loss": 4.5947113037109375 }, { "id": "identity_301035", "image": "Super-CLEVR/images/superCLEVR_new_000351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large shiny object of the same shape as the small one?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 15.09375, "loss": 5.05903959274292 }, { "id": "identity_303415", "image": "AI2D/images/2884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the dentin?\nChoices:\n(A) E\n(B) G\n(C) C\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.296875, "loss": 4.372708320617676 }, { "id": "identity_203873", "image": "VizWiz/images/VizWiz_train_00018049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the logo in the top left corner?" }, { "from": "gpt", "value": "The answer is Swoosh or tick" } ], "clip_score": 18.140625, "loss": 4.03883171081543 }, { "id": "identity_297028", "image": "A-OKVQA/images/360399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the visual distortion under the skateboard?\nChoices:\n(A) heat\n(B) sound\n(C) light\n(D) wind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.828125, "loss": 4.61371374130249 }, { "id": "identity_16279", "image": "PMC-VQA/images/PMC6108967_Fig6_358725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side shows pleural effusion?\nChoices:\n(A) Right \n(B) Left \n(C) Both sides \n(D) No pleural effusion " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.703125, "loss": 4.411124229431152 }, { "id": "identity_25765", "image": "ScienceQA/images/13775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Phoenix\n(C) Providence\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.296875, "loss": 4.373635292053223 }, { "id": "identity_16755", "image": "VQA2.0/images/338258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From those whose whole bodies are visible, are they all the same gender?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.3125, "loss": 4.1012797355651855 }, { "id": "identity_108407", "image": "A-OKVQA/images/71608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cyclists can be seen wearing helmets?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.125, "loss": 4.497835636138916 }, { "id": "identity_192471", "image": "PMC-VQA/images/PMC6384036_FIG2_438696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general shape of the abnormality indicated by the top arrow?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 24.828125, "loss": 4.533433437347412 }, { "id": "identity_298322", "image": "PMC-VQA/images/PMC3954266_fig2_273904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the content of the incisional hernia observed in the CT scan? \nChoices:\n(A) Bowel\n(B) Ovarian cyst\n(C) Lymph node\n(D) Omentum" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.15625, "loss": 4.79227352142334 }, { "id": "identity_306476", "image": "A-OKVQA/images/208408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phrase is most fitting?\nChoices:\n(A) airplane race\n(B) conflagration\n(C) downpour\n(D) earthquake" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.890625, "loss": 5.08054256439209 }, { "id": "identity_202091", "image": "A-OKVQA/images/162792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tracks are visible in the left image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.28125, "loss": 4.632901668548584 }, { "id": "identity_309962", "image": "TQA/images/abc_question_images/parts_microscope_17202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the image represents the lens?\nChoices:\n(A) E\n(B) J\n(C) M\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.294345855712891 }, { "id": "identity_154673", "image": "AI2D/images/456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food chains can you identify in this food web that include the sparrow?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.59375, "loss": 4.509415626525879 }, { "id": "identity_272401", "image": "ScienceQA/images/13010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Saint Kitts and Nevis\n(B) Antigua and Barbuda\n(C) Cuba\n(D) Jamaica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.625, "loss": 4.098443984985352 }, { "id": "identity_283567", "image": "VQA2.0/images/65880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the signs that indicate the men are warming up?" }, { "from": "gpt", "value": "The answer is only 1 team" } ], "clip_score": 16.625, "loss": 4.308590412139893 }, { "id": "identity_309043", "image": "TQA/images/question_images/human_system_eye_6124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost structure at the front?\nChoices:\n(A) Sclera\n(B) Cornea\n(C) Iris\n(D) Lens" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.30801248550415 }, { "id": "identity_291634", "image": "VQA2.0/images/430885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the picture contain a clock?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.78125, "loss": 4.131929874420166 }, { "id": "identity_277456", "image": "TextVQA/images/1c37dca03d3afa3e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What message is the person trying to convey through the words on the coffee mug today?" }, { "from": "gpt", "value": "The answer is awesome" } ], "clip_score": 34.59375, "loss": 5.34560489654541 }, { "id": "identity_72307", "image": "TQA/images/abc_question_images/human_system_respiratory_16202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the right lung?" }, { "from": "gpt", "value": "The answer is Label V" } ], "clip_score": 30.359375, "loss": 4.2239251136779785 }, { "id": "identity_179848", "image": "TabMWP/images/20944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Ellen wants to buy an equal number of small teddy bears and rubber ducks with $5.00, what is the maximum number of each she can buy?" }, { "from": "gpt", "value": "The answer is 5 pairs" } ], "clip_score": 32.75, "loss": 4.144123077392578 }, { "id": "identity_71026", "image": "TQA/images/question_images/tides_2606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase of the moon is depicted in the image?" }, { "from": "gpt", "value": "The answer is New moon" } ], "clip_score": 26.453125, "loss": 4.032249927520752 }, { "id": "identity_337856", "image": "A-OKVQA/images/22652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are these individuals going to participate in?\nChoices:\n(A) olympic race\n(B) zoo visit\n(C) ski lessons\n(D) recordings" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.328125, "loss": 4.130370140075684 }, { "id": "identity_116325", "image": "Super-CLEVR/images/superCLEVR_new_000628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest object in the image?" }, { "from": "gpt", "value": "The answer is The darts" } ], "clip_score": 15.46875, "loss": 4.136741638183594 }, { "id": "identity_34411", "image": "ScienceQA/images/18341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) North Carolina\n(B) Iowa\n(C) Virginia\n(D) New Hampshire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.03125, "loss": 4.148029327392578 }, { "id": "identity_298123", "image": "FigureQA/images/95371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Hot Pink the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.054994583129883 }, { "id": "identity_261820", "image": "FigureQA/images/91176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.375, "loss": 4.191068649291992 }, { "id": "identity_38435", "image": "PMC-VQA/images/PMC5906535_F6_303486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction is the rostral end facing in the images?\nChoices:\n(A) Right \n(B) Up \n(C) Left \n(D) Down" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.7421875, "loss": 4.01929235458374 }, { "id": "identity_321428", "image": "TabMWP/images/31287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows something. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 4.623645782470703 }, { "id": "identity_289400", "image": "PMC-VQA/images/PMC5020733_f2_72526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In all panels except panel h, what type of view is depicted? \nChoices:\n(A) lateral view\n(B) anterior view\n(C) ventral view\n(D) dorsal view" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.8125, "loss": 4.383784294128418 }, { "id": "identity_34883", "image": "A-OKVQA/images/89882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man hoping to do by skating between the two green cones?\nChoices:\n(A) get exercise\n(B) stop fall\n(C) perfect stunt\n(D) avoid traffic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 4.413989067077637 }, { "id": "identity_301486", "image": "A-OKVQA/images/114025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does the person need string?\nChoices:\n(A) knit\n(B) fly kite\n(C) sew\n(D) tie ends" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.348876476287842 }, { "id": "identity_333575", "image": "TQA/images/question_images/human_system_muscular_2851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is found in the arm?\nChoices:\n(A) Biceps\n(B) Tibialis anterior\n(C) Frontalis\n(D) Quadriceps" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.796875, "loss": 4.0574116706848145 }, { "id": "identity_300667", "image": "A-OKVQA/images/252929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these letters used for?\nChoices:\n(A) child's candy\n(B) writing messages\n(C) mark problems\n(D) luck" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.8984375, "loss": 4.547953128814697 }, { "id": "identity_249989", "image": "TQA/images/question_images/solar_system_6296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the planets in order from closest to farthest from the sun, and identify the fourth planet in this sequence" }, { "from": "gpt", "value": "The answer is Mars" } ], "clip_score": 29.890625, "loss": 4.045368671417236 }, { "id": "identity_306888", "image": "FigureQA/images/28521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.015625, "loss": 4.018784523010254 }, { "id": "identity_19958", "image": "A-OKVQA/images/214385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What age group is most represented at this location?\nChoices:\n(A) seniors\n(B) teens\n(C) children\n(D) adults" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5703125, "loss": 4.544827461242676 }, { "id": "identity_268453", "image": "DVQA/images/bar_train_00108380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is symbolized by the color sky blue?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 21.40625, "loss": 4.385534286499023 }, { "id": "identity_300991", "image": "VizWiz/images/VizWiz_train_00015169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this object?" }, { "from": "gpt", "value": "The answer is dustin" } ], "clip_score": 20.15625, "loss": 5.205806732177734 }, { "id": "identity_304258", "image": "TQA/images/abc_question_images/human_system_eye_16130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the iris in this diagram?\nChoices:\n(A) P\n(B) D\n(C) J\n(D) S" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.150872707366943 }, { "id": "identity_275362", "image": "PMC-VQA/images/PMC8140552_FIG6_447381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the abnormality shown in the image:\nOptions:\n(A) Lung nodule\n(B) Pneumothorax\n(C) Pleural effusion\n(D) Pulmonary embolism" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.203125, "loss": 4.379581928253174 }, { "id": "identity_335977", "image": "A-OKVQA/images/370602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is not allowed here?\nChoices:\n(A) eating\n(B) busses\n(C) taxis\n(D) boarding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.9375, "loss": 4.1110758781433105 }, { "id": "identity_3544", "image": "ScienceQA/images/3090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the fish below.\nChoices:\n(A) American bullfrog\n(B) whale shark\n(C) sea turtle\n(D) yak" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.8125, "loss": 4.445679664611816 }, { "id": "identity_13905", "image": "ScienceQA/images/15015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) South America\n(B) Antarctica\n(C) Europe\n(D) Africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.711022853851318 }, { "id": "identity_11077", "image": "Super-CLEVR/images/superCLEVR_new_001198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the blue thing that is behind the big red motorbike made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 16.78125, "loss": 4.149049282073975 }, { "id": "identity_36921", "image": "ScienceQA/images/4449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Honolulu\n(B) Phoenix\n(C) Biloxi\n(D) Tucson" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.356167793273926 }, { "id": "identity_23026", "image": "TextVQA/images/6ac9c8857df1241d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of soccer ball are they kicking?" }, { "from": "gpt", "value": "The answer is volt" } ], "clip_score": 19.640625, "loss": 5.482439041137695 }, { "id": "identity_311009", "image": "ScienceQA/images/20291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy was transferred from () to ().\nChoices:\n(A) each cake . . . the surroundings\n(B) the surroundings . . . each cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.421875, "loss": 4.022724628448486 }, { "id": "identity_297568", "image": "A-OKVQA/images/136928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely reason for the man standing near the buses?\nChoices:\n(A) get directions\n(B) travel\n(C) drive\n(D) sight-see" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.84375, "loss": 4.963757038116455 }, { "id": "identity_297200", "image": "A-OKVQA/images/199764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item is being sold under the tent by the man? Is it:\n(A) tables\n(B) peanuts\n(C) overalls\n(D) apples" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.953125, "loss": 4.166380405426025 }, { "id": "identity_241155", "image": "Super-CLEVR/images/superCLEVR_new_001848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large metal things on the left side of the large red thing than metallic bicycles and motorcycles combined?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.578125, "loss": 4.997611999511719 }, { "id": "identity_196114", "image": "PMC-VQA/images/PMC4008275_fig4_285384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black arrows are pointing to the aorta?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 28.640625, "loss": 4.863525867462158 }, { "id": "identity_305143", "image": "FigureQA/images/58308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Dark Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.625, "loss": 4.018582820892334 }, { "id": "identity_327001", "image": "A-OKVQA/images/345644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the coldest thing here?\nChoices:\n(A) car engine\n(B) snow\n(C) fan\n(D) refrigerator" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7734375, "loss": 4.401926040649414 }, { "id": "identity_334898", "image": "DVQA/images/bar_train_00141358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in this chart?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.65625, "loss": 4.019858360290527 }, { "id": "identity_274396", "image": "TQA/images/abc_question_images/faults_11724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter H represent in the following choices: (A) HANGING WALL, (B) FOOT WALL, (C) NORMAL FAULT, (D) WALL?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.171875, "loss": 4.104710578918457 }, { "id": "identity_19576", "image": "A-OKVQA/images/472676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they doing with the surfboards?\nChoices:\n(A) selling them\n(B) taking home\n(C) tossing them\n(D) riding them" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 4.334901332855225 }, { "id": "identity_291832", "image": "VQA2.0/images/157958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the man who is resting on the board?" }, { "from": "gpt", "value": "The answer is security" } ], "clip_score": 16.78125, "loss": 4.017175197601318 }, { "id": "identity_26431", "image": "PMC-VQA/images/PMC7284614_plants-09-00604-f003_188543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrowhead in the image indicate? \nChoices:\n(A) The position of the fs laser beam. \n(B) The location of the cell nucleus. \n(C) The appearance of a new organelle. \n(D) The location of Hechtian strands. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.8125, "loss": 4.507004261016846 }, { "id": "identity_249211", "image": "Super-CLEVR/images/superCLEVR_new_001240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles in the image, and how many of them are either blue or brown?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.609375, "loss": 4.116109371185303 }, { "id": "identity_25178", "image": "AI2D/images/3356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are ladybug spots located?\nChoices:\n(A) Leg\n(B) Antennae\n(C) Pronotum\n(D) Elytra" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.3125, "loss": 4.01035737991333 }, { "id": "identity_346", "image": "DVQA/images/bar_train_00107775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third group of bars from the left?" }, { "from": "gpt", "value": "The answer is cold" } ], "clip_score": 15.9375, "loss": 4.920519828796387 }, { "id": "identity_338558", "image": "PMC-VQA/images/PMC6550046_DMM039065F2_489391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does each row in the image indicate?\nChoices:\n(A) Glass brain projection \n(B) Statistical analyses \n(C) Transverse section level \n(D) Coordinates relative to Bregma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5625, "loss": 4.121768951416016 }, { "id": "identity_280733", "image": "AI2D/images/3983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram is represented by the center portion?\nChoices:\n(A) xylem\n(B) phloem\n(C) none of the above\n(D) pith" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.8125, "loss": 4.061017036437988 }, { "id": "identity_299988", "image": "IconQA/images/14795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.703125, "loss": 4.3594465255737305 }, { "id": "identity_12535", "image": "DocVQA/images/xfmd0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the position of Leigh A. Beck?" }, { "from": "gpt", "value": "The answer is senior director" } ], "clip_score": 23.90625, "loss": 4.42238187789917 }, { "id": "identity_148579", "image": "DocVQA/images/nfdd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient of the letter?" }, { "from": "gpt", "value": "The answer is Frank Konishi" } ], "clip_score": 20.40625, "loss": 4.060311794281006 }, { "id": "identity_306291", "image": "FigureQA/images/98784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon greater than this color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.578125, "loss": 4.1134114265441895 }, { "id": "identity_31580", "image": "A-OKVQA/images/537139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might cause the property values of the houses to stay low in this location?\nChoices:\n(A) wind\n(B) weather\n(C) jobs\n(D) train noise" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.5703125, "loss": 4.4066972732543945 }, { "id": "identity_328272", "image": "PMC-VQA/images/PMC5963950_f3_317361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these plots indicate?\nChoices:\n(A) Random values \n(B) Parameter estimates \n(C) Cluster peaks \n(D) Standard deviation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.78125, "loss": 4.054457664489746 }, { "id": "identity_6556", "image": "AI2D/images/3248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central part of the cell?\nChoices:\n(A) cytoplasm\n(B) nucleus\n(C) mitochondrion\n(D) golgi apparatus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 4.375692367553711 }, { "id": "identity_333464", "image": "PMC-VQA/images/PMC7044387_ccr32665-fig-0001_125308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging sequence was utilized to identify the dilation of certain ventricles?\nChoices:\n(A) T2 \n(B) FLAIR \n(C) T1 \n(D) DWI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.133998394012451 }, { "id": "identity_318304", "image": "TextVQA/images/165bdc1b0193a27a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this sign say?" }, { "from": "gpt", "value": "The answer is boteler st" } ], "clip_score": 12.4296875, "loss": 6.18900203704834 }, { "id": "identity_7294", "image": "TQA/images/abc_question_images/human_system_respiratory_11327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the main respiratory muscle?\nChoices:\n(A) Y\n(B) S\n(C) N\n(D) L" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.546875, "loss": 4.002381324768066 }, { "id": "identity_157358", "image": "AI2D/images/3198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the \"Lateral roots\" in the plant's growth?" }, { "from": "gpt", "value": "The answer is Absorption and stabilization" } ], "clip_score": 34.0, "loss": 4.304960250854492 }, { "id": "identity_294371", "image": "VizWiz/images/VizWiz_train_00018462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of mushroom is this, and is it toxic or safe for consumption?" }, { "from": "gpt", "value": "The answer is wild" } ], "clip_score": 31.859375, "loss": 6.2037811279296875 }, { "id": "identity_285514", "image": "DVQA/images/bar_train_00159429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the color red?" }, { "from": "gpt", "value": "The answer is plenty" } ], "clip_score": 24.828125, "loss": 5.404393196105957 }, { "id": "identity_25213", "image": "VizWiz/images/VizWiz_train_00019951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why isn't my computer starting?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 31.84375, "loss": 4.077940464019775 }, { "id": "identity_56846", "image": "DVQA/images/bar_train_00138240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest sales in the ring store?" }, { "from": "gpt", "value": "The answer is pause" } ], "clip_score": 32.4375, "loss": 4.3963494300842285 }, { "id": "identity_282133", "image": "IconQA/images/25762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nOptions:\n(A) rotate\n(B) invert\n(C) shift" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.21875, "loss": 4.1659393310546875 }, { "id": "identity_18552", "image": "IconQA/images/69552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.4375, "loss": 4.001010894775391 }, { "id": "identity_286262", "image": "PMC-VQA/images/PMC4506045_pone.0133375.g006_407017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the insert in the image represent?\nOptions:\n(A) Sections along the Medial-Lateral axis\n(B) Sections along the Anterior-Posterior axis\n(C) Sections along the Proximal-Distal axis\n(D) Sections along the Dorsal-Ventral axis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.21875, "loss": 4.0969438552856445 }, { "id": "identity_304448", "image": "FigureQA/images/84614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.953125, "loss": 4.045228004455566 }, { "id": "identity_26677", "image": "ScienceQA/images/15312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) stretchy\n(B) hard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5, "loss": 4.177257537841797 }, { "id": "identity_288621", "image": "IconQA/images/31236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) unlikely\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.890625, "loss": 4.280655860900879 }, { "id": "identity_120155", "image": "Super-CLEVR/images/superCLEVR_new_000701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus located on the left side of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.328125, "loss": 5.731713771820068 }, { "id": "identity_905", "image": "TQA/images/abc_question_images/nuclear_energy_17105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part represented by label M?\nChoices:\n(A) BORON CONTROL RODS\n(B) URANIUM FUEL RODS\n(C) PUMP\n(D) CORE" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.765625, "loss": 5.267022609710693 }, { "id": "identity_334054", "image": "FigureQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.96875, "loss": 4.055440902709961 }, { "id": "identity_267027", "image": "VQA-RAD/images/synpic34449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the image, where can the calcifications be found?" }, { "from": "gpt", "value": "The answer is High on the image" } ], "clip_score": 25.53125, "loss": 4.2676591873168945 }, { "id": "identity_128238", "image": "VQA-AS/images/11957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.5625, "loss": 4.290091514587402 }, { "id": "identity_204577", "image": "PMC-VQA/images/PMC5775220_F4_265922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the highlighted area in the image?" }, { "from": "gpt", "value": "The answer is Irregular oval" } ], "clip_score": 27.0625, "loss": 4.423290252685547 }, { "id": "identity_6508", "image": "Super-CLEVR/images/superCLEVR_new_001151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of red shiny motorbikes that are in front of the large green minivan and tiny red metal dirtbikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.203125, "loss": 4.7874040603637695 }, { "id": "identity_34135", "image": "TabMWP/images/8349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some teachers compared how many students are in their classes. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.25, "loss": 4.035449028015137 }, { "id": "identity_286468", "image": "VizWiz/images/VizWiz_train_00019951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for my computer not starting?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 32.875, "loss": 4.271549224853516 }, { "id": "identity_110259", "image": "TextVQA/images/cace6f17ad0f4c31.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the dresses worn by the women?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.421875, "loss": 4.041972637176514 }, { "id": "identity_203807", "image": "TextVQA/images/61e78e1529ef6d0e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking on the sidewalk?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.65625, "loss": 5.217329978942871 }, { "id": "identity_34826", "image": "ScienceQA/images/9075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Illinois?\nChoices:\n(A) Springfield\n(B) Saint Paul\n(C) Cleveland\n(D) Topeka" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 4.184879779815674 }, { "id": "identity_332600", "image": "A-OKVQA/images/218701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is offscreen to the bottom right and probably casting a shadow onto the snow?\nChoices:\n(A) sign\n(B) tree\n(C) fence\n(D) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.25, "loss": 4.01132869720459 }, { "id": "identity_129729", "image": "A-OKVQA/images/66825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are touching the horse sculpture?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.21875, "loss": 4.425076961517334 }, { "id": "identity_29375", "image": "PMC-VQA/images/PMC5289170_Fig2_122235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modality was used to capture the image? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0, "loss": 4.442074298858643 }, { "id": "identity_184907", "image": "PMC-VQA/images/PMC2494588_fig3_26203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the leg is predominantly shown in the image?" }, { "from": "gpt", "value": "The answer is Thigh region" } ], "clip_score": 28.59375, "loss": 4.1893630027771 }, { "id": "identity_305705", "image": "DVQA/images/bar_train_00143636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is chip" } ], "clip_score": 16.703125, "loss": 4.829763889312744 }, { "id": "identity_273416", "image": "AI2D/images/614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the final stage in the life cycle of a mosquito? \n(A) pupa \n(B) egg \n(C) larva \n(D) adult" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.96875, "loss": 4.087701797485352 }, { "id": "identity_69470", "image": "AI2D/images/2095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food chain?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 34.96875, "loss": 4.087882041931152 }, { "id": "identity_18853", "image": "PMC-VQA/images/PMC7723371_f2_316772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI show in the territory of the artery on the left? \nChoices:\n(A) Hypersignal \n(B) Iso signal \n(C) Hyposignal \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.09375, "loss": 4.137251377105713 }, { "id": "identity_15984", "image": "FigureQA/images/5140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown less than Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.09375, "loss": 4.06014347076416 }, { "id": "identity_274297", "image": "FigureQA/images/48467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 4.532970428466797 }, { "id": "identity_13710", "image": "ScienceQA/images/5810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) stretchy\n(B) fragile\n(C) flexible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.421875, "loss": 4.862699031829834 }, { "id": "identity_209793", "image": "TextVQA/images/bd75fd0264fdaf5b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light sources are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.65625, "loss": 4.3468780517578125 }, { "id": "identity_6242", "image": "PMC-VQA/images/PMC6425224_F4_453946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the sacrum is the lesion located in? \nChoices:\n(A) Right \n(B) Left \n(C) Anterior \n(D) Posterior " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.171875, "loss": 4.25308895111084 }, { "id": "identity_198308", "image": "TextVQA/images/4282b101d2690f61.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are visible in the image?" }, { "from": "gpt", "value": "The answer is Three birds" } ], "clip_score": 21.46875, "loss": 4.213480472564697 }, { "id": "identity_261740", "image": "FigureQA/images/61638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.1875, "loss": 4.42059326171875 }, { "id": "identity_333864", "image": "PMC-VQA/images/PMC8007494_Fig11_405724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the calcification located anatomically?\nChoices:\n(A) insertion of a muscle.\n(B) insertion of another muscle.\n(C) insertion of a different muscle.\n(D) insertion of yet another muscle." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.46875, "loss": 4.928905963897705 }, { "id": "identity_66832", "image": "ScienceQA/images/3537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the plant is directly below the petals in the given image?" }, { "from": "gpt", "value": "The answer is Sepal" } ], "clip_score": 30.46875, "loss": 4.023300647735596 }, { "id": "identity_191991", "image": "TextVQA/images/05189c0ddf2b5e94.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fingers is the person holding the camera using to support it from underneath?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.125, "loss": 4.406948566436768 }, { "id": "identity_124690", "image": "A-OKVQA/images/396519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the shorts of the player with the number 55?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.03125, "loss": 4.074933052062988 }, { "id": "identity_309002", "image": "DVQA/images/bar_train_00177913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the steelblue color?" }, { "from": "gpt", "value": "The answer is farm" } ], "clip_score": 24.859375, "loss": 4.099043369293213 }, { "id": "identity_262589", "image": "AI2D/images/2582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices represents the sun rays in the diagram? \n(A) M\n(B) E\n(C) F\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.484375, "loss": 4.074790954589844 }, { "id": "identity_37060", "image": "TextVQA/images/034ab6f9af333146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what sport are they watching?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 21.796875, "loss": 4.394800662994385 }, { "id": "identity_325420", "image": "AI2D/images/987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter on the diagram indicates the follicle?\nChoices:\n(A) R\n(B) H\n(C) D\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.203125, "loss": 4.408966541290283 }, { "id": "identity_292424", "image": "IconQA/images/22293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, following the first picture of a rocket? Is it the star, rocket, or moon?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 4.514465808868408 }, { "id": "identity_31557", "image": "A-OKVQA/images/527248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the roof shape of the building called?\nChoices:\n(A) pointy roof\n(B) conical roof\n(C) pavilion roof\n(D) tower roof" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.66015625, "loss": 4.284614562988281 }, { "id": "identity_50997", "image": "DocVQA/images/qhxj0037_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two developments worth mentioning in the context of PVOC?" }, { "from": "gpt", "value": "The answer is SUMMA passivated canister and replacing sampling bags" } ], "clip_score": 19.125, "loss": 5.725103855133057 }, { "id": "identity_127361", "image": "Super-CLEVR/images/superCLEVR_new_001996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is located between the yellow road bike and the purple car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 26.9375, "loss": 4.589127063751221 }, { "id": "identity_208726", "image": "A-OKVQA/images/55467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tablecloth?" }, { "from": "gpt", "value": "The answer is Transparent" } ], "clip_score": 17.65625, "loss": 4.5385332107543945 }, { "id": "identity_2948", "image": "PMC-VQA/images/PMC3765512_F2_230058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the notable finding in the sagittal T1-weighted subtracted dynamic image of the left breast? \nChoices:\n(A) irregular mass \n(B) homogeneous mass \n(C) ascites \n(D) irregular calcifications " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0625, "loss": 4.295039176940918 }, { "id": "identity_315248", "image": "AI2D/images/238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the herbivores in the above food chain disappear, which group will be most impacted?\nChoices:\n(A) Carnivore\n(B) Plant\n(C) Phytoplankton\n(D) Primary producers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.46875, "loss": 4.262206554412842 }, { "id": "identity_185245", "image": "VQA2.0/images/62127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.75, "loss": 4.507553577423096 }, { "id": "identity_268229", "image": "FigureQA/images/50890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Saddle Brown or Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.0625, "loss": 4.553173065185547 }, { "id": "identity_288106", "image": "A-OKVQA/images/517017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is visible near the building among the following options: United Kingdom, Russia, United States, or France?\nChoices:\n(A) united kingdom\n(B) russia\n(C) united states\n(D) france" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.8046875, "loss": 4.006685256958008 }, { "id": "identity_311336", "image": "VizWiz/images/VizWiz_train_00008371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on this shirt?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.828125, "loss": 4.4053544998168945 }, { "id": "identity_61315", "image": "DocVQA/images/fgbg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which event location is repeated for more than one event?" }, { "from": "gpt", "value": "The answer is No location is repeated" } ], "clip_score": 16.65625, "loss": 4.152212619781494 }, { "id": "identity_270933", "image": "TextVQA/images/bba20d711a767294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written on the top line of the left page?" }, { "from": "gpt", "value": "The answer is on a watch sent home to" } ], "clip_score": 18.265625, "loss": 4.372455596923828 }, { "id": "identity_244461", "image": "PMC-VQA/images/PMC6689493_FIG1_22340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of the tumor in the left kidney as shown in the CT scan, and how do these features differentiate it from a cyst, stone, or inflammation?" }, { "from": "gpt", "value": "The answer is The mass has irregular borders and heterogeneous density" } ], "clip_score": 34.625, "loss": 4.150708198547363 }, { "id": "identity_257157", "image": "VQA2.0/images/237886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sinks are available for use, and how many soap dispensers are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.453125, "loss": 4.210186004638672 }, { "id": "identity_271177", "image": "FigureQA/images/58963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.626820087432861 }, { "id": "identity_337551", "image": "A-OKVQA/images/114414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this player going to do now?\nChoices:\n(A) sit\n(B) return home\n(C) hit someone\n(D) run" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.890625, "loss": 4.267718315124512 }, { "id": "identity_223298", "image": "TextVQA/images/bf588ad3668bd625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jersey?" }, { "from": "gpt", "value": "The answer is Navy blue" } ], "clip_score": 23.484375, "loss": 4.332055568695068 }, { "id": "identity_25384", "image": "PMC-VQA/images/PMC7693558_F2_306672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image pointed by the star? \nChoices:\n(A) Proximal part \n(B) Caudal part \n(C) Distal part \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.5, "loss": 4.789772987365723 }, { "id": "identity_262596", "image": "FigureQA/images/84747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Khaki or Midnight Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.265625, "loss": 4.627542972564697 }, { "id": "identity_37818", "image": "Super-CLEVR/images/superCLEVR_new_001330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer green tandem bikes than tiny red rubber fighters?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.9375, "loss": 5.1113481521606445 }, { "id": "identity_326599", "image": "TextVQA/images/4a5645fbdf764cfd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of items are in the box?" }, { "from": "gpt", "value": "The answer is organ action parts" } ], "clip_score": 25.265625, "loss": 6.0036187171936035 }, { "id": "identity_313893", "image": "TextVQA/images/952fbe4c44de98fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being advertised?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 8.34375, "loss": 4.49769401550293 }, { "id": "identity_329941", "image": "A-OKVQA/images/105865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What device might you find near this object?\nChoices:\n(A) phone\n(B) kindle\n(C) microwave\n(D) tv" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.53125, "loss": 4.410242557525635 }, { "id": "identity_126625", "image": "A-OKVQA/images/40635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is shown on the sign above the fruit stalls?" }, { "from": "gpt", "value": "The answer is Netike Kreativa" } ], "clip_score": 25.59375, "loss": 6.612578868865967 }, { "id": "identity_33611", "image": "ScienceQA/images/14926.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New York\n(B) Massachusetts\n(C) Mississippi\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.546875, "loss": 4.5724945068359375 }, { "id": "identity_334425", "image": "ScienceQA/images/9850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) blue\n(B) sour\n(C) salty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.046875, "loss": 4.223830699920654 }, { "id": "identity_69020", "image": "TQA/images/question_images/parts_microscope_7197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objective lenses are present on this microscope?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 34.125, "loss": 4.157220840454102 }, { "id": "identity_336115", "image": "FigureQA/images/5062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Light Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.359375, "loss": 4.106694221496582 }, { "id": "identity_36079", "image": "PMC-VQA/images/PMC5992769_Fig1_326031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is being scanned in the AbH image? \nChoices:\n(A) PB \n(B) Biceps \n(C) AbH \n(D) Triceps " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.078125, "loss": 4.020802974700928 }, { "id": "identity_295336", "image": "PMC-VQA/images/PMC7272461_Fig7_184439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the diagnostic tests, in which part of the spine was the lesion found?\nChoices:\n(A) Lumbar spine\n(B) Cervical spine\n(C) Thoracic spine\n(D) Sacral spine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.78125, "loss": 4.1152472496032715 }, { "id": "identity_296084", "image": "A-OKVQA/images/556176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of remotes are being held by the individuals?\nChoices:\n(A) stereo\n(B) air conditioner\n(C) TV\n(D) video game" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.46875, "loss": 4.059390544891357 }, { "id": "identity_47686", "image": "DocVQA/images/yfnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the director who signed the letter?" }, { "from": "gpt", "value": "The answer is David Deutsch" } ], "clip_score": 23.921875, "loss": 4.135189056396484 }, { "id": "identity_202786", "image": "VQA2.0/images/319682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.578125, "loss": 4.198129653930664 }, { "id": "identity_114344", "image": "A-OKVQA/images/104309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots are visible on the shelf above the sink?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.84375, "loss": 4.556558609008789 }, { "id": "identity_9401", "image": "ScienceQA/images/17953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Oklahoma?\nChoices:\n(A) Oklahoma City\n(B) Tallahassee\n(C) Jefferson City\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.546875, "loss": 4.239870071411133 }, { "id": "identity_313808", "image": "PMC-VQA/images/PMC4455327_Fig4_393478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what plane were these MR images of rat livers taken?\nChoices:\n(A) transverse plane \n(B) sagittal plane \n(C) coronal plane \n(D) vertical plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.515625, "loss": 4.320255279541016 }, { "id": "identity_333853", "image": "PMC-VQA/images/PMC7328697_FIG1_202923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What condition is suggested by cholelithiasis and thickening of the gallbladder wall?\nChoices:\n(A) Acute cholecystitis \n(B) Gallbladder cancer \n(C) Chronic cholecystitis \n(D) Inflammatory bowel disease" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.609375, "loss": 4.097075462341309 }, { "id": "identity_266479", "image": "ScienceQA/images/15649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from the surroundings to each greenhouse or from each greenhouse to the surroundings?\nChoices:\n(A) the surroundings . . . each greenhouse\n(B) each greenhouse . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.734375, "loss": 4.084478855133057 }, { "id": "identity_314343", "image": "AI2D/images/1080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the diagram above?\nChoices:\n(A) Cell\n(B) None of the above\n(C) Plant\n(D) Volcano" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.453125, "loss": 4.044315814971924 }, { "id": "identity_263090", "image": "PMC-VQA/images/PMC4141340_Fig12_314910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the structures indicated by the arrows in the image? Are they:\n(A) Blood vessels\n(B) Nerves\n(C) Bones\n(D) Mass and deposit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.671875, "loss": 4.0262041091918945 }, { "id": "identity_6624", "image": "AI2D/images/3147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bottom-most part?\nChoices:\n(A) none of the above\n(B) vein\n(C) stem\n(D) blade" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.75, "loss": 4.317214488983154 }, { "id": "identity_324440", "image": "A-OKVQA/images/340594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the individuals at the ends of each side of the table share in common?\nChoices:\n(A) glasses\n(B) coats\n(C) hats\n(D) backpacks" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.375, "loss": 4.252162933349609 }, { "id": "identity_288706", "image": "IconQA/images/76169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of rectangles an even number or an odd number?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.21875, "loss": 4.189661979675293 }, { "id": "identity_2326", "image": "AI2D/images/3002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the region that represents the leaflet?\nChoices:\n(A) b\n(B) a\n(C) d\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.280797481536865 }, { "id": "identity_285268", "image": "A-OKVQA/images/398142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is currently being displayed? France, Italy, Switzerland, or England?\nChoices:\n(A) france\n(B) italy\n(C) switzerland\n(D) england" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.796875, "loss": 4.0754899978637695 }, { "id": "identity_313682", "image": "ScienceQA/images/5810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) stretchy\n(B) fragile\n(C) flexible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.046875, "loss": 5.06063985824585 }, { "id": "identity_14119", "image": "FigureQA/images/13170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.38193416595459 }, { "id": "identity_216924", "image": "A-OKVQA/images/134888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the dugout?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.140625, "loss": 4.851916313171387 }, { "id": "identity_284075", "image": "PMC-VQA/images/PMC4671957_fig5_451194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the second column in the figure?\nChoices:\n(A) The simulated maps in gray \n(B) The BP assembly by GA \n(C) The resolution of the maps \n(D) The actual maps at resolution 10." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.921875, "loss": 4.0450286865234375 }, { "id": "identity_253181", "image": "IconQA/images/31333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of the spinner landing on each color, and which color has the highest probability?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 25.359375, "loss": 4.711308479309082 }, { "id": "identity_215696", "image": "A-OKVQA/images/155312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bat used by the player?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.375, "loss": 4.174017429351807 }, { "id": "identity_20852", "image": "A-OKVQA/images/305667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the two holding the umbrellas standing in the tunnel?\nChoices:\n(A) to hide\n(B) keeping dry\n(C) to kiss\n(D) boarding train" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.015625, "loss": 4.3111467361450195 }, { "id": "identity_156613", "image": "AI2D/images/1405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the process of soil formation as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Soil horizonation" } ], "clip_score": 31.796875, "loss": 4.130644798278809 }, { "id": "identity_261502", "image": "TabMWP/images/3931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With $92.00, does Kylie have sufficient funds to purchase both a hammock and a lawn chair?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.0, "loss": 4.141998291015625 }, { "id": "identity_248347", "image": "VQA2.0/images/524019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items on the desk are primarily white in color?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 29.640625, "loss": 4.135331153869629 }, { "id": "identity_263112", "image": "PMC-VQA/images/PMC4709269_fig7_461017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the dashed lines shown in the image?\nChoices:\n(A) Regions of high activity in the brain \n(B) The boundaries of different brain regions \n(C) The location of identical spots within the brain \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0, "loss": 4.470126152038574 }, { "id": "identity_334420", "image": "PMC-VQA/images/PMC8069390_diagnostics-11-00686-f002_425897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area of the image is the dilated element intended to cover in the DRL segmentation?\nChoices:\n(A) Bright center\n(B) Left side\n(C) Dark rim\n(D) Top edge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.359375, "loss": 4.46099853515625 }, { "id": "identity_268464", "image": "FigureQA/images/73685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan the highest in value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.21875, "loss": 4.614222526550293 }, { "id": "identity_197091", "image": "TextVQA/images/5693c81c6f066fca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many posters can be seen on the table?" }, { "from": "gpt", "value": "The answer is Two posters" } ], "clip_score": 14.6015625, "loss": 4.269429683685303 }, { "id": "identity_299720", "image": "Super-CLEVR/images/superCLEVR_new_000240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer SUVs than buses?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.734375, "loss": 5.215139865875244 }, { "id": "identity_330066", "image": "TextVQA/images/d8399cc5e867952d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is republican hotel" } ], "clip_score": 18.0, "loss": 4.631290912628174 }, { "id": "identity_45519", "image": "DVQA/images/bar_train_00182776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which title has the highest value for the category 'plot'?" }, { "from": "gpt", "value": "The answer is stairs" } ], "clip_score": 24.40625, "loss": 4.481198787689209 }, { "id": "identity_308885", "image": "IconQA/images/60431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a plane. What is the ninth image?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.59375, "loss": 4.760465621948242 }, { "id": "identity_327255", "image": "PlotQA/images/49330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of this axis?" }, { "from": "gpt", "value": "The answer is Year" } ], "clip_score": 18.765625, "loss": 4.385543346405029 }, { "id": "identity_32512", "image": "IconQA/images/39160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, which color are you less likely to pick?\nChoices:\n(A) green\n(B) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.25, "loss": 4.496657371520996 }, { "id": "identity_269582", "image": "TextVQA/images/a505c1868bd97294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of camera is being held by the girl on the right?" }, { "from": "gpt", "value": "The answer is canon" } ], "clip_score": 29.640625, "loss": 4.103443145751953 }, { "id": "identity_32847", "image": "A-OKVQA/images/408624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the small floats on the boats sides here meant to prevent the boats doing?\nChoices:\n(A) getting lost\n(B) soaring\n(C) bumping\n(D) sinking" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.8828125, "loss": 4.480626583099365 }, { "id": "identity_322735", "image": "ScienceQA/images/5784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Springfield\n(B) Provo\n(C) Chicago\n(D) Madison" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.625, "loss": 4.296077728271484 }, { "id": "identity_317316", "image": "VQA-RAD/images/synpic60703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of artifact is depicted in this image?" }, { "from": "gpt", "value": "The answer is Motion" } ], "clip_score": 22.65625, "loss": 4.754310131072998 }, { "id": "identity_61760", "image": "DocVQA/images/lfpf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word in the document?" }, { "from": "gpt", "value": "The answer is MINUS" } ], "clip_score": 18.328125, "loss": 4.142992973327637 }, { "id": "identity_336495", "image": "A-OKVQA/images/505469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bird standing above?\nChoices:\n(A) fruit\n(B) baby\n(C) egg\n(D) cardboard box" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.71875, "loss": 4.47792911529541 }, { "id": "identity_19558", "image": "A-OKVQA/images/469605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is behind the man that is standing?\nChoices:\n(A) fish\n(B) dogs\n(C) tents\n(D) barbed wire" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.46875, "loss": 4.5583295822143555 }, { "id": "identity_14003", "image": "A-OKVQA/images/216449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What large substance will the youngest child be ingesting?\nChoices:\n(A) burrito\n(B) tortilla\n(C) taco\n(D) pizza" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.46875, "loss": 4.204714298248291 }, { "id": "identity_6848", "image": "ScienceQA/images/17034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Vermont\n(C) Virginia\n(D) Maine" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.201807975769043 }, { "id": "identity_205703", "image": "TextVQA/images/16148c308de6d401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles in the image have a visible number '12' on their labels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.296875, "loss": 4.322780609130859 }, { "id": "identity_296823", "image": "PMC-VQA/images/PMC6023823_F1_334814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed in the abdomen x-ray?\nChoices:\n(A) soft tissue mass \n(B) bone fracture \n(C) gaseous distention of bowel with air-fluid levels \n(D) brain hemorrhage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.140625, "loss": 4.0777082443237305 }, { "id": "identity_266406", "image": "VQA2.0/images/543642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current location of Snow White?" }, { "from": "gpt", "value": "The answer is book" } ], "clip_score": 17.796875, "loss": 4.688408851623535 }, { "id": "identity_325087", "image": "ScienceQA/images/20882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boise\n(B) Helena\n(C) Louisville\n(D) Salem" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5, "loss": 4.446637153625488 }, { "id": "identity_335736", "image": "A-OKVQA/images/250192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this animal doing?\nChoices:\n(A) drinking\n(B) painting\n(C) playing soccer\n(D) eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.078125, "loss": 4.201068878173828 }, { "id": "identity_296151", "image": "PMC-VQA/images/PMC7061816_FIG1_130555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical structure is responsible for the increased mass observed in the images?\nOptions:\n(A) Hippocampus\n(B) Caudate nucleus\n(C) Septum pellucidum and corpus callosum\n(D) Putamen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.515625, "loss": 4.560331344604492 }, { "id": "identity_30180", "image": "A-OKVQA/images/390515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man to the right of the black vehicle laying on?\nChoices:\n(A) box\n(B) sofa\n(C) bench\n(D) case" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.6875, "loss": 4.420579433441162 }, { "id": "identity_337637", "image": "PMC-VQA/images/PMC7567400_f2_276956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the top-left corner of the axial view?\nChoices:\n(A) The technician-defined tumor regions \n(B) The DeepMedic-defined tumor regions \n(C) The mean neuroradiologist score \n(D) The exam date." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.203125, "loss": 4.419747352600098 }, { "id": "identity_128455", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 19.25, "loss": 4.79545259475708 }, { "id": "identity_319736", "image": "VQA2.0/images/448607.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the colors of the umbrellas?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 21.96875, "loss": 4.10178279876709 }, { "id": "identity_327956", "image": "A-OKVQA/images/378090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of day is depicted here?\nChoices:\n(A) morning\n(B) dawn\n(C) evening\n(D) afternoon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.6953125, "loss": 4.234216213226318 }, { "id": "identity_289998", "image": "ScienceQA/images/10532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\n(A) Carson City\n(B) Phoenix\n(C) Chicago\n(D) Sacramento" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 4.084122657775879 }, { "id": "identity_37067", "image": "TQA/images/question_images/blastocyst_9034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Considering a Blastocyst, what cell portion will develop into the new human being?\nChoices:\n(A) Embryoblast\n(B) Trophoblast\n(C) Blastocyst Cavity\n(D) Zygote" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.5, "loss": 5.167971611022949 }, { "id": "identity_298554", "image": "A-OKVQA/images/490739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of group is the man on the left most likely a member of?\nChoices:\n(A) politician\n(B) teacher\n(C) farmer\n(D) clergy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.4375, "loss": 4.523061752319336 }, { "id": "identity_146005", "image": "DocVQA/images/nsbm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the article mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Frontispieces with Hidden Meanings" } ], "clip_score": 16.5625, "loss": 4.061585426330566 }, { "id": "identity_13472", "image": "PMC-VQA/images/PMC6639972_Fig3_11423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which hemisphere of the brain is shown on the left side of the image? \nChoices:\n(A) Left hemisphere \n(B) Right hemisphere \n(C) Both hemispheres \n(D) Cannot be determined " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.21875, "loss": 4.043593883514404 }, { "id": "identity_215381", "image": "A-OKVQA/images/265243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles visible in the image that are not in motion?" }, { "from": "gpt", "value": "The answer is No stationary vehicles visible" } ], "clip_score": 20.890625, "loss": 4.229355335235596 }, { "id": "identity_145215", "image": "DocVQA/images/qrgf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this letter?" }, { "from": "gpt", "value": "The answer is Esther F. Philpand" } ], "clip_score": 25.15625, "loss": 4.103085041046143 }, { "id": "identity_325564", "image": "PMC-VQA/images/PMC3766138_F1_230254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique shown?\nChoices:\n(A) CT-scan\n(B) MRI\n(C) X-ray\n(D) PET-scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.283189296722412 }, { "id": "identity_19185", "image": "PMC-VQA/images/PMC4937708_F1_49450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region can be filled with the radioactive solution? \nChoices:\n(A) Skull \n(B) Scalp \n(C) Cerebrospinal fluid space \n(D) Grey matter " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.96875, "loss": 5.051600456237793 }, { "id": "identity_311338", "image": "FigureQA/images/84242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.4375, "loss": 4.055054664611816 }, { "id": "identity_310871", "image": "IconQA/images/38312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough lids for each of these?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.8125, "loss": 4.005306243896484 }, { "id": "identity_263798", "image": "DVQA/images/bar_train_00018012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In any given category, which object is the most favored?" }, { "from": "gpt", "value": "The answer is regime" } ], "clip_score": 26.15625, "loss": 4.520982265472412 }, { "id": "identity_200176", "image": "PMC-VQA/images/PMC7417738_Fig2_232399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled with white dots surrounding the yellow area?" }, { "from": "gpt", "value": "The answer is OTS" } ], "clip_score": 21.515625, "loss": 4.243223667144775 }, { "id": "identity_9009", "image": "PMC-VQA/images/PMC5803221_Fig3_272997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to capture the images? \nChoices:\n(A) Electron microscopy \n(B) Ordinary light microscopy \n(C) Fluorescence microscopy \n(D) Scanning probe microscopy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.265625, "loss": 4.225441932678223 }, { "id": "identity_126888", "image": "PMC-VQA/images/PMC7604333_F7_289139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the abbreviation \"IBS\" stand for in the context of this image?" }, { "from": "gpt", "value": "The answer is Interbundle septum" } ], "clip_score": 31.265625, "loss": 4.213153839111328 }, { "id": "identity_283542", "image": "DocVQA/images/jsbn0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which title appears first in the document?" }, { "from": "gpt", "value": "The answer is liabilities" } ], "clip_score": 25.96875, "loss": 4.56411600112915 }, { "id": "identity_289036", "image": "A-OKVQA/images/343322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the name of the cantina?\n(A) gary's\n(B) fred's\n(C) george's\n(D) bill's" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.65625, "loss": 4.253714561462402 }, { "id": "identity_337748", "image": "Super-CLEVR/images/superCLEVR_new_000883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of vehicles in front of the matte car the same as the number of planes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.1875, "loss": 5.024656772613525 }, { "id": "identity_118788", "image": "TextVQA/images/4db6d43d361105c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing white and red uniforms?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.9375, "loss": 4.300364017486572 }, { "id": "identity_12650", "image": "FigureQA/images/15088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 4.245242118835449 }, { "id": "identity_220641", "image": "VQA-AS/images/1652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.34375, "loss": 4.470035552978516 }, { "id": "identity_268755", "image": "AI2D/images/1026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The image displays various types of ___________ of conifer trees.\nOptions:\n(A) stems\n(B) Roots\n(C) Leaves\n(D) Thorns" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.40625, "loss": 4.106988906860352 }, { "id": "identity_311993", "image": "AI2D/images/4368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of label E in the image?\nChoices:\n(A) pecan\n(B) brazil\n(C) hazelnut\n(D) almond" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.765625, "loss": 4.148198127746582 }, { "id": "identity_123302", "image": "A-OKVQA/images/563542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the ground floor of the yellow building?" }, { "from": "gpt", "value": "The answer is A restaurant" } ], "clip_score": 20.484375, "loss": 4.152410984039307 }, { "id": "identity_113614", "image": "VQA-AS/images/12623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are visible on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.046875, "loss": 4.47635555267334 }, { "id": "identity_198776", "image": "A-OKVQA/images/233112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the shower curtain?" }, { "from": "gpt", "value": "The answer is Abstract geometric shapes" } ], "clip_score": 21.265625, "loss": 4.83603572845459 }, { "id": "identity_298457", "image": "A-OKVQA/images/97097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What risk is associated with engaging in this activity without wearing a jacket?\nChoices:\n(A) starvation\n(B) hypothermia\n(C) bear attack\n(D) dehydration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9609375, "loss": 4.445620536804199 }, { "id": "identity_183573", "image": "PMC-VQA/images/PMC7513663_F1_260666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bones can be identified in the image?" }, { "from": "gpt", "value": "The answer is Five bones" } ], "clip_score": 23.140625, "loss": 4.020150184631348 }, { "id": "identity_116615", "image": "PMC-VQA/images/PMC4619311_pone.0140106.g004_436645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the largest cavity depicted in the image?" }, { "from": "gpt", "value": "The answer is Irregular and rounded" } ], "clip_score": 23.0, "loss": 4.387592792510986 }, { "id": "identity_271521", "image": "A-OKVQA/images/388177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the original color of this truck? Was it:\n(A) red\n(B) green\n(C) yellow\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.87451171875 }, { "id": "identity_29616", "image": "A-OKVQA/images/445503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the people located?\nChoices:\n(A) restaurant\n(B) garage\n(C) stadium\n(D) office building" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.109375, "loss": 4.364499568939209 }, { "id": "identity_197741", "image": "A-OKVQA/images/158713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many passengers are being carried by the vehicle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.140625, "loss": 4.016339302062988 }, { "id": "identity_10836", "image": "TextVQA/images/1a625079edf146ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of suits are these?" }, { "from": "gpt", "value": "The answer is mobile" } ], "clip_score": 13.390625, "loss": 4.144515037536621 }, { "id": "identity_295354", "image": "PMC-VQA/images/PMC6206383_Fig24_387536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the winding structures seen in the head CT images indicate?\nChoices:\n(A) Arteries\n(B) Bones\n(C) Venous channels\n(D) Nerves" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.390625, "loss": 4.577003479003906 }, { "id": "identity_276943", "image": "PMC-VQA/images/PMC4151864_fig5_317318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the pattern observed in the image, what was the initial suspicion regarding the condition shown?\nChoices:\n(A) Benignity \n(B) Infection \n(C) Malignancy \n(D) None of the Above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.296875, "loss": 4.023498058319092 }, { "id": "identity_217274", "image": "A-OKVQA/images/258249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrella?" }, { "from": "gpt", "value": "The answer is Transparent with white border" } ], "clip_score": 29.03125, "loss": 4.127665996551514 }, { "id": "identity_110680", "image": "TextVQA/images/5328eb74f18893d1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant activity of the people in the image?" }, { "from": "gpt", "value": "The answer is Beach-going or strolling" } ], "clip_score": 19.265625, "loss": 4.112874507904053 }, { "id": "identity_292219", "image": "PMC-VQA/images/PMC6392861_F2_442004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body was the inflammation located?\nChoices:\n(A) Left cerebral hemisphere \n(B) Spinal cord \n(C) Right cerebral hemisphere \n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.125, "loss": 4.485450744628906 }, { "id": "identity_262250", "image": "AI2D/images/359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What comes immediately after the egg stage in the life cycle diagram shown below?\nChoices:\n(A) death\n(B) pupa\n(C) egg\n(D) larva" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.0, "loss": 4.166823387145996 }, { "id": "identity_232809", "image": "TextVQA/images/05ff38279bc0d7d6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main message on the billboard, and which TV show is being advertised alongside it?" }, { "from": "gpt", "value": "The answer is The truth will spread, Helix on Syfy" } ], "clip_score": 19.71875, "loss": 4.642027378082275 }, { "id": "identity_326808", "image": "PMC-VQA/images/PMC5608210_pone.0183979.g003_218259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two particular images should be compared to see the artifacts?\nChoices:\n(A) A and B \n(B) C and F \n(C) D and G \n(D) E and H" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.328125, "loss": 4.147581577301025 }, { "id": "identity_319984", "image": "TextVQA/images/0f2e50dac6fcb058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is wearing number 14?" }, { "from": "gpt", "value": "The answer is lorance" } ], "clip_score": 11.4375, "loss": 4.886151313781738 }, { "id": "identity_305013", "image": "PMC-VQA/images/PMC7848076_F6_356972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of echocardiography was utilized?\nChoices:\n(A) Transesophageal echocardiography\n(B) M-mode echocardiography\n(C) Doppler echocardiography\n(D) 3D echocardiography" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.59375, "loss": 4.711911201477051 }, { "id": "identity_27917", "image": "PMC-VQA/images/PMC9293144_dc24875-fig-0002_343806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining procedure used to visualize the cell?\nChoices:\n(A) H&E stain \n(B) Giemsa stain \n(C) Papanicolaou stain \n(D) Hematoxylin stain " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.671875, "loss": 4.428746700286865 }, { "id": "identity_321963", "image": "A-OKVQA/images/102506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of these colored lights?\nChoices:\n(A) decoration\n(B) decoration\n(C) traffic control\n(D) illumination" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.171875, "loss": 4.708378791809082 }, { "id": "identity_128583", "image": "VQA2.0/images/159601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the building on the left?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 13.7421875, "loss": 4.645725250244141 }, { "id": "identity_260518", "image": "A-OKVQA/images/300631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What steps must the man follow to replace the tube on the tire, and what tools are visible in the image that he might use for this task?" }, { "from": "gpt", "value": "The answer is Tire levers and wrench" } ], "clip_score": 19.96875, "loss": 4.834214687347412 }, { "id": "identity_279641", "image": "PMC-VQA/images/PMC3914336_fig2_264315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the pseudocyst situated?\nChoices:\n(A) Anterior vaginal wall\n(B) Posterior vaginal wall\n(C) Cervix\n(D) Uterus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.21875, "loss": 4.0629963874816895 }, { "id": "identity_326057", "image": "Super-CLEVR/images/superCLEVR_new_001320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the motorbike that is in front of a large gray metallic object to the left of the metal fighter made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 27.25, "loss": 4.354213714599609 }, { "id": "identity_269236", "image": "ScienceQA/images/20534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\n(A) Annapolis\n(B) Athens\n(C) Davenport\n(D) Dallas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.28125, "loss": 4.114058017730713 }, { "id": "identity_44679", "image": "FigureQA/images/37082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 30.78125, "loss": 4.143476963043213 }, { "id": "identity_338438", "image": "TQA/images/question_images/greenhouse_effect_6944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What causes the greenhouse effect?\nChoices:\n(A) Incoming Energy.\n(B) Energy trapped in atmosphere.\n(C) Reflected Energy.\n(D) Outgoing Energy." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.921875, "loss": 4.05682897567749 }, { "id": "identity_291927", "image": "VQA-AS/images/4328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sofa colored red?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.796875, "loss": 4.106034755706787 }, { "id": "identity_272420", "image": "A-OKVQA/images/525024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for their activity of flying kites?\nChoices:\n(A) are bored\n(B) want money\n(C) showing off\n(D) great weather" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.296875, "loss": 4.2193756103515625 }, { "id": "identity_3655", "image": "PMC-VQA/images/PMC5703880_Fig8_244525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscope was used to capture the images? \nChoices:\n(A) SEM \n(B) TEM \n(C) Confocal Microscopy \n(D) Fluorescence Microscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.046875, "loss": 4.383542060852051 }, { "id": "identity_43882", "image": "DocVQA/images/zhfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the transmission mode used for this report?" }, { "from": "gpt", "value": "The answer is XMR" } ], "clip_score": 24.0625, "loss": 4.773271083831787 }, { "id": "identity_204342", "image": "VQA2.0/images/426663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible legs does the horse have?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 19.484375, "loss": 4.028409004211426 }, { "id": "identity_14495", "image": "AI2D/images/3089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the tips of the hair like parts of the nerve called?\nChoices:\n(A) Dendrites\n(B) Axon\n(C) Soma\n(D) Cell" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5, "loss": 4.090348720550537 }, { "id": "identity_295885", "image": "A-OKVQA/images/555684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: By whom was the dog in the image placed there?\nChoices:\n(A) self\n(B) cat\n(C) mortal enemy\n(D) owner" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.1015625, "loss": 4.185378074645996 }, { "id": "identity_337796", "image": "TabMWP/images/26915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.453125, "loss": 4.368975639343262 }, { "id": "identity_14932", "image": "ScienceQA/images/3321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Charlotte\n(B) Raleigh\n(C) Frankfort\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.37801456451416 }, { "id": "identity_277839", "image": "TextVQA/images/61408ab56f2fb616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which program can assist you in advancing your dreams?" }, { "from": "gpt", "value": "The answer is quickmemo" } ], "clip_score": 20.609375, "loss": 4.6202898025512695 }, { "id": "identity_249207", "image": "PMC-VQA/images/PMC3755418_fig4_227731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of the lesion in the left breast that suggest it is metastasis rather than benign tissue, lymphadenopathy, or an abscess?" }, { "from": "gpt", "value": "The answer is Irregular, spiculated mass with heterogeneous density" } ], "clip_score": 29.96875, "loss": 4.015435695648193 }, { "id": "identity_299706", "image": "PMC-VQA/images/PMC4718303_F1_463166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the imaging in the patient?\nChoices:\n(A) Normal brain function \n(B) Hypometabolism in the right hemisphere \n(C) No observable changes \n(D) Hypometabolism in the left hemisphere." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.025822162628174 }, { "id": "identity_206849", "image": "VQA2.0/images/173196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.8515625, "loss": 4.262728691101074 }, { "id": "identity_191203", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fare for a two-axle vehicle according to the coin?" }, { "from": "gpt", "value": "The answer is One fare" } ], "clip_score": 24.15625, "loss": 4.517316818237305 }, { "id": "identity_271805", "image": "FigureQA/images/5733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 4.465695858001709 }, { "id": "identity_33275", "image": "TextVQA/images/9c53c69ab5c39f21.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of shoes are these?" }, { "from": "gpt", "value": "The answer is alysa" } ], "clip_score": 28.21875, "loss": 4.10552978515625 }, { "id": "identity_148978", "image": "DocVQA/images/tgdd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the editor who signed the letter?" }, { "from": "gpt", "value": "The answer is Lin Richter" } ], "clip_score": 23.40625, "loss": 5.383930683135986 }, { "id": "identity_275812", "image": "A-OKVQA/images/298051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term describes a band with this number of members: quartet, cinqtet, duet, or sextet?\nChoices:\n(A) quartet\n(B) cinqtet\n(C) duet\n(D) sextet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0625, "loss": 4.393853664398193 }, { "id": "identity_273237", "image": "FigureQA/images/14632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.578125, "loss": 4.060170650482178 }, { "id": "identity_285098", "image": "PMC-VQA/images/PMC4928250_Fig1_46722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image was utilized to identify the defect among the following choices: \n(A) Axial, T2-weighted image \n(B) Sagittal, T1-weighted image \n(C) Coronal, T2-weighted image \n(D) Sagittal, proton density-weighted image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.125, "loss": 4.1295928955078125 }, { "id": "identity_314321", "image": "VQA2.0/images/40407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the individual directly underneath the kite male or female?" }, { "from": "gpt", "value": "The answer is female" } ], "clip_score": 19.359375, "loss": 4.1071882247924805 }, { "id": "identity_183315", "image": "VQA2.0/images/222036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible traffic cones are there on the sand?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.421875, "loss": 4.380051612854004 }, { "id": "identity_122439", "image": "PMC-VQA/images/PMC8129905_Ch1.F1_444244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely clinical indication for this imaging study?" }, { "from": "gpt", "value": "The answer is Suspected shoulder injury/pathology" } ], "clip_score": 32.34375, "loss": 4.219336986541748 }, { "id": "identity_293904", "image": "A-OKVQA/images/199437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the item located on the far right among the following options: (A) slices meat, (B) warms room, (C) destroys DNA, (D) cuts cake?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.1875, "loss": 4.244525909423828 }, { "id": "identity_47283", "image": "DocVQA/images/hqvf0065_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who approved the visit?" }, { "from": "gpt", "value": "The answer is Frank Buck" } ], "clip_score": 23.421875, "loss": 5.60992431640625 }, { "id": "identity_336131", "image": "PMC-VQA/images/PMC6562476_cells-08-00422-f009_492685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the confocal images taken from in the medaka?\nChoices:\n(A) Tail fin\n(B) Pectoral fin\n(C) Gills\n(D) Brain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.921875, "loss": 4.075111389160156 }, { "id": "identity_27068", "image": "TextVQA/images/7e12d937e6e90065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time is it according to the stove's clock?" }, { "from": "gpt", "value": "The answer is thursdays until 9pm" } ], "clip_score": 29.8125, "loss": 4.073914051055908 }, { "id": "identity_5641", "image": "Super-CLEVR/images/superCLEVR_new_000617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a bicycle; is it the same size as the gray airliner to the left of the motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.734375, "loss": 4.803529262542725 }, { "id": "identity_281767", "image": "TextVQA/images/7a805588525a71e4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which year was the wine produced?" }, { "from": "gpt", "value": "The answer is colon" } ], "clip_score": 20.6875, "loss": 6.5809173583984375 }, { "id": "identity_265159", "image": "FigureQA/images/67411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.59375, "loss": 4.724829196929932 }, { "id": "identity_266415", "image": "FigureQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise considered the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.254458427429199 }, { "id": "identity_217838", "image": "Super-CLEVR/images/superCLEVR_new_001353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the golden motorcycle and in front of the blue car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 33.84375, "loss": 4.771237373352051 }, { "id": "identity_299937", "image": "A-OKVQA/images/247057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what position is the center point of the snowboard on the rail for this player?\nChoices:\n(A) flat\n(B) parallel\n(C) right turn\n(D) curved" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.828125, "loss": 4.478297233581543 }, { "id": "identity_9055", "image": "FigureQA/images/6522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown greater than Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.875, "loss": 4.068846702575684 }, { "id": "identity_317953", "image": "AI2D/images/713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What applies pressure to the Rock?\nChoices:\n(A) Ocean\n(B) Lake\n(C) River\n(D) Pond" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.75, "loss": 4.0696330070495605 }, { "id": "identity_337426", "image": "PMC-VQA/images/PMC6159166_f1_373056.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black arrowheads in this image?\nChoices:\n(A) The lesion.\n(B) The peripheral rim of enhancement.\n(C) The central area of enhancement.\n(D) A different lesion." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.84375, "loss": 4.371938228607178 }, { "id": "identity_312541", "image": "A-OKVQA/images/108075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person using the computer to do?\nChoices:\n(A) game\n(B) watch movie\n(C) film\n(D) type" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.015625, "loss": 4.132845401763916 }, { "id": "identity_308422", "image": "PMC-VQA/images/PMC6032467_F8_337932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images were used to examine this patient?\nChoices:\n(A) MRI\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.84375, "loss": 4.1915740966796875 }, { "id": "identity_276402", "image": "FigureQA/images/88829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Dark Salmon? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.265625, "loss": 4.040825366973877 }, { "id": "identity_241192", "image": "Super-CLEVR/images/superCLEVR_new_001796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have the same shape as the gray object on the left side of the tiny gray matte object, and what are their colors?" }, { "from": "gpt", "value": "The answer is Two objects, gray and blue" } ], "clip_score": 20.984375, "loss": 4.16995096206665 }, { "id": "identity_311117", "image": "PMC-VQA/images/PMC7546424_F7_269083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of algae is being examined in the confocal images?\nChoices:\n(A) Chlorella sp.\n(B) Spirulina sp.\n(C) Halothece sp.\n(D) Dunaliella sp." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.34375, "loss": 4.159715175628662 }, { "id": "identity_273676", "image": "FigureQA/images/43176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Gold or Orange Red? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.546875, "loss": 4.701987266540527 }, { "id": "identity_62160", "image": "DocVQA/images/fqbg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the conditions for becoming an emeritus member?" }, { "from": "gpt", "value": "The answer is Retires at age limit or retired because of illness" } ], "clip_score": 18.375, "loss": 4.3112473487854 }, { "id": "identity_5071", "image": "ScienceQA/images/10517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) blue\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.582183837890625 }, { "id": "identity_134011", "image": "TextVQA/images/0b2f523a4e734bec.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure above the aircraft made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 24.59375, "loss": 4.10044527053833 }, { "id": "identity_4203", "image": "ScienceQA/images/19522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of blue particles?\nChoices:\n(A) Solution B\n(B) Solution A\n(C) neither; their concentrations are the same" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.6875, "loss": 4.148240089416504 }, { "id": "identity_7920", "image": "FigureQA/images/22661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.015625, "loss": 4.092437744140625 }, { "id": "identity_318004", "image": "FigureQA/images/10093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.46875, "loss": 4.019742012023926 }, { "id": "identity_328928", "image": "PMC-VQA/images/PMC7332069_pone.0234881.g001_204196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the patient is shown on the right-hand side of this?\nChoices:\n(A) right side\n(B) left side\n(C) front side\n(D) back side" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.921875, "loss": 4.469015598297119 }, { "id": "identity_304845", "image": "TQA/images/question_images/sun_layers_6308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cap that covers the core of this layer?\nChoices:\n(A) sunspot\n(B) corona\n(C) prominence\n(D) radiative zone" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.046875, "loss": 4.291827201843262 }, { "id": "identity_119125", "image": "A-OKVQA/images/39928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lamps are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.578125, "loss": 4.5984930992126465 }, { "id": "identity_129652", "image": "VQA2.0/images/120052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.328125, "loss": 4.526468753814697 }, { "id": "identity_201752", "image": "PMC-VQA/images/PMC5963950_f3_317361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different views of the brain are presented in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.9375, "loss": 4.5723700523376465 }, { "id": "identity_297455", "image": "PMC-VQA/images/PMC7744532_Fig3_322190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the blue areas in the graph?\nChoices:\n(A) Longer durations \n(B) Shorter durations \n(C) Higher volume in the lower MI group \n(D) Lower volume in the lower MI group." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.84375, "loss": 5.147305488586426 }, { "id": "identity_301965", "image": "ScienceQA/images/20123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbia\n(B) Bismarck\n(C) Jackson\n(D) Nashville" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.09375, "loss": 4.196153163909912 }, { "id": "identity_227309", "image": "AI2D/images/877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From the above food web diagram, if the population of seagulls increases, what will be the effect on the population of sea anemones?" }, { "from": "gpt", "value": "The answer is Decreases" } ], "clip_score": 39.375, "loss": 4.066387176513672 }, { "id": "identity_28299", "image": "PMC-VQA/images/PMC5963950_f3_317361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the plots show?\nChoices:\n(A) Random values \n(B) Parameter estimates \n(C) Cluster peaks \n(D) Standard deviation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.9765625, "loss": 4.3530683517456055 }, { "id": "identity_297148", "image": "PMC-VQA/images/PMC6077554_fig1_350465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following structures is not visible in the cerebral CT scan? \nChoices:\n(A) White matter\n(B) Gray matter\n(C) Lateral ventricles\n(D) Sylvian fissures" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.71875, "loss": 4.139106750488281 }, { "id": "identity_266474", "image": "A-OKVQA/images/181367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object located near the woman on the left in the image?\nChoices:\n(A) monkey\n(B) bicycle\n(C) egg\n(D) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6328125, "loss": 4.551466941833496 }, { "id": "identity_308196", "image": "PMC-VQA/images/PMC5380283_F3_149141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the new lesions found?\nChoices:\n(A) Frontal cerebral ventricle.\n(B) Middle cerebral ventricle.\n(C) Lateral cerebral ventricle.\n(D) Fourth ventricle." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.28125, "loss": 4.429171562194824 }, { "id": "identity_282363", "image": "ScienceQA/images/2863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which chemical formula corresponds to this molecule? \nChoices:\n(A) HCl\n(B) HClN\n(C) HC\n(D) H2Cl" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.5625, "loss": 4.005858898162842 }, { "id": "identity_336588", "image": "PMC-VQA/images/PMC6957557_Fig3_99164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate? \nChoices:\n(A) A previous gastrostomy site \n(B) A probe insertion site \n(C) An ulcer area after ESD \n(D) Healthy tissue around an ulcer area." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.684521675109863 }, { "id": "identity_9209", "image": "PMC-VQA/images/PMC6206386_Fig10_387600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to obtain the images shown? \nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) PET scan " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.078125, "loss": 4.016270160675049 }, { "id": "identity_14773", "image": "ScienceQA/images/1126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does pollen help a plant do?\nChoices:\n(A) grow bigger\n(B) make seeds\n(C) grow new leaves" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.46875, "loss": 4.1848626136779785 }, { "id": "identity_110965", "image": "PMC-VQA/images/PMC5880870_Fig2_296385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct red areas can be counted in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.015625, "loss": 4.5978169441223145 }, { "id": "identity_284385", "image": "DVQA/images/bar_train_00005221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.6875, "loss": 4.41296911239624 }, { "id": "identity_189920", "image": "A-OKVQA/images/515660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the green item next to the bread?" }, { "from": "gpt", "value": "The answer is A lime" } ], "clip_score": 13.2578125, "loss": 4.2234416007995605 }, { "id": "identity_314547", "image": "A-OKVQA/images/516174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the blue piece on the end of this?\n\nChoices:\n(A) slats\n(B) wing\n(C) spoiler\n(D) vertical stabilizer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.484375, "loss": 4.261539936065674 }, { "id": "identity_247642", "image": "AI2D/images/2531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the moon phases in the correct order starting from the new moon phase and identify the label of the phase that comes immediately before the full moon phase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.65625, "loss": 4.38590145111084 }, { "id": "identity_303666", "image": "TQA/images/question_images/human_system_reproductory_7016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is directly below this organ?\nChoices:\n(A) prostate\n(B) rectum\n(C) seminal vesicles\n(D) anus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.9375, "loss": 4.55255126953125 }, { "id": "identity_2939", "image": "VizWiz/images/VizWiz_train_00011327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Okay, I need to know where the arrow is and where the heavy load is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.515625, "loss": 4.039050579071045 }, { "id": "identity_119066", "image": "PMC-VQA/images/PMC3366891_F8_140377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown?" }, { "from": "gpt", "value": "The answer is Coronal plane" } ], "clip_score": 23.40625, "loss": 4.097681045532227 }, { "id": "identity_208587", "image": "A-OKVQA/images/48034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slalom gates can be seen in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.140625, "loss": 4.392469882965088 }, { "id": "identity_269319", "image": "FigureQA/images/80571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly identifies whether Turquoise is the smoothest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.328125, "loss": 4.163139343261719 }, { "id": "identity_19649", "image": "IconQA/images/97910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of diamonds even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.046875, "loss": 4.174979209899902 }, { "id": "identity_28375", "image": "A-OKVQA/images/331680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the animals here being used as?\nChoices:\n(A) police\n(B) guides\n(C) pets\n(D) pack animals" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.4140625, "loss": 4.0331292152404785 }, { "id": "identity_258759", "image": "VQA2.0/images/84819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of objects visible in the image, and how many distinct types can you identify?" }, { "from": "gpt", "value": "The answer is six distinct types" } ], "clip_score": 13.71875, "loss": 5.00430154800415 }, { "id": "identity_263985", "image": "DVQA/images/bar_train_00007198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the color red?" }, { "from": "gpt", "value": "The answer is beam" } ], "clip_score": 28.8125, "loss": 5.622570514678955 }, { "id": "identity_13187", "image": "PMC-VQA/images/PMC4416458_F1_382605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the insular cortex was affected by the lesion?\nChoices:\n(A) Left \n(B) Right \n(C) Both \n(D) Neither" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.177857398986816 }, { "id": "identity_49941", "image": "DVQA/images/bar_train_00045868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy on the 'worth' algorithm?" }, { "from": "gpt", "value": "The answer is notice" } ], "clip_score": 36.9375, "loss": 4.589421272277832 }, { "id": "identity_32588", "image": "DVQA/images/bar_train_00105380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom?" }, { "from": "gpt", "value": "The answer is score" } ], "clip_score": 18.390625, "loss": 4.094729423522949 }, { "id": "identity_304158", "image": "AI2D/images/3245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the center of this structure?\nChoices:\n(A) nucleolus\n(B) cytoplasm\n(C) ribosome\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.625, "loss": 4.422953128814697 }, { "id": "identity_318750", "image": "A-OKVQA/images/186461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person holding their arm out?\nChoices:\n(A) hold\n(B) wave\n(C) balance\n(D) break fall" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.21875, "loss": 4.34014368057251 }, { "id": "identity_292343", "image": "A-OKVQA/images/99568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which genre of magazine is this individual particularly interested in? \nChoices:\n(A) entertainment\n(B) horror comic\n(C) cars\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.0625, "loss": 4.095135688781738 }, { "id": "identity_19662", "image": "VQA2.0/images/22536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man's hair short?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.578125, "loss": 4.374300956726074 }, { "id": "identity_288634", "image": "VizWiz/images/VizWiz_train_00010756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does this mouse belong to?" }, { "from": "gpt", "value": "The answer is dell" } ], "clip_score": 32.53125, "loss": 4.002484321594238 }, { "id": "identity_263107", "image": "DVQA/images/bar_train_00045868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars is labeled as the first one from the left?" }, { "from": "gpt", "value": "The answer is corner" } ], "clip_score": 20.140625, "loss": 4.208573341369629 }, { "id": "identity_335889", "image": "A-OKVQA/images/464812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which league would they prefer to join?\nChoices:\n(A) nba\n(B) mlb\n(C) nfl\n(D) nhl" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.578125, "loss": 4.352624893188477 }, { "id": "identity_291862", "image": "PMC-VQA/images/PMC3407708_F5_147299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of images were captured of the cervical spine? \nOptions:\n(A) Sagittal and Coronal\n(B) Axial and Oblique\n(C) Transverse and MIP\n(D) Lateral and Computed Tomography" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.28125, "loss": 4.393697738647461 }, { "id": "identity_282850", "image": "A-OKVQA/images/45829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type of store are the men most likely to be standing? \nChoices:\n(A) educational\n(B) video games\n(C) kids toys\n(D) home appliances" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0546875, "loss": 4.405369281768799 }, { "id": "identity_324565", "image": "AI2D/images/4343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What represents a clementine in this?\nChoices:\n(A) N\n(B) none of the above\n(C) B\n(D) M" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.375, "loss": 4.3140153884887695 }, { "id": "identity_321881", "image": "A-OKVQA/images/525409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this?\nChoices:\n(A) rodeo\n(B) amusement park\n(C) airport\n(D) daycare" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.8125, "loss": 4.4038825035095215 }, { "id": "identity_331198", "image": "A-OKVQA/images/570601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the origin of baseball?\nChoices:\n(A) sweden\n(B) england\n(C) america\n(D) france" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.625, "loss": 4.2433977127075195 }, { "id": "identity_271563", "image": "FigureQA/images/99251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 4.002751350402832 }, { "id": "identity_298866", "image": "PMC-VQA/images/PMC6957570_Fig8_99259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which samples are depicted in figure g? Is it:\n(A) Contralateral to injection site\n(B) Ipsilateral to injection site\n(C) Box f\n(D) Box d" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.100458145141602 }, { "id": "identity_12736", "image": "A-OKVQA/images/439837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on top of the counter?\nChoices:\n(A) banana\n(B) sink\n(C) cat\n(D) television" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.40625, "loss": 4.092276573181152 }, { "id": "identity_314707", "image": "FigureQA/images/98313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0625, "loss": 4.1434102058410645 }, { "id": "identity_125523", "image": "VQA-RAD/images/synpic27277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the heart size within normal limits?" }, { "from": "gpt", "value": "The answer is Heart size appears normal" } ], "clip_score": 20.515625, "loss": 4.09526252746582 }, { "id": "identity_293608", "image": "DVQA/images/bar_train_00024570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.4375, "loss": 4.228413105010986 }, { "id": "identity_332505", "image": "IconQA/images/29003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) certain\n(B) unlikely\n(C) impossible\n(D) probable" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.109375, "loss": 4.074501991271973 }, { "id": "identity_52069", "image": "DVQA/images/bar_train_00144288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"tissue\"?" }, { "from": "gpt", "value": "The answer is regime" } ], "clip_score": 24.828125, "loss": 4.602173805236816 }, { "id": "identity_313559", "image": "A-OKVQA/images/401071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity might those under these umbrellas take part in at some point during the day?\nChoices:\n(A) drag racing\n(B) binge eating\n(C) betting\n(D) swimming" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.96875, "loss": 4.094521522521973 }, { "id": "identity_290585", "image": "PMC-VQA/images/PMC5569957_rrw123F3_206555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the purple, green, yellow, and orange lines in the image? Is it:\n(A) Radiation dose in different regions\n(B) Different types of tissues\n(C) Structures identified through different methods\n(D) Different planes of the body." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.53130578994751 }, { "id": "identity_26000", "image": "A-OKVQA/images/494637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the black headgear on the horses made from?\nChoices:\n(A) leather\n(B) cotton\n(C) wood\n(D) plastic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.984375, "loss": 4.365573883056641 }, { "id": "identity_43333", "image": "DVQA/images/bar_train_00058756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy value?" }, { "from": "gpt", "value": "The answer is female" } ], "clip_score": 30.046875, "loss": 4.011367321014404 }, { "id": "identity_263393", "image": "TQA/images/abc_question_images/human_system_eye_16140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the labeled element that represents the pupil:\n(A) H\n(B) M\n(C) V\n(D) U" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0, "loss": 4.180830001831055 }, { "id": "identity_185967", "image": "VizWiz/images/VizWiz_train_00004608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color on the CD case under the glass?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 26.546875, "loss": 4.015700340270996 }, { "id": "identity_308930", "image": "FigureQA/images/14504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.34375, "loss": 4.033029556274414 }, { "id": "identity_281161", "image": "PMC-VQA/images/PMC6423896_F1_453476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed at the surgical site in terms of enhancement?\nChoices:\n(A) Enhancement of the cerebellum \n(B) Enhancement of the occipital lobe \n(C) Enhancement of the frontotemporal lobe \n(D) Enhancement of the parietal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.21875, "loss": 4.678799152374268 }, { "id": "identity_259489", "image": "TabMWP/images/5311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of Nobel Prize winners for all three countries combined in the 1970s and 1980s, and what is the difference between the total number of winners in the 1980s and the 1970s?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 42.4375, "loss": 4.063995361328125 }, { "id": "identity_317368", "image": "PMC-VQA/images/PMC7604333_F7_289139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this histogram show?\nChoices:\n(A) Length of projection fibers\n(B) Width of projection fibers\n(C) Number of projection fibers\n(D) Type of projection fibers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.90625, "loss": 4.927767753601074 }, { "id": "identity_253490", "image": "ScienceQA/images/11061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital. Additionally, list one neighboring state and its capital" }, { "from": "gpt", "value": "The answer is Pierre, Nebraska - Lincoln" } ], "clip_score": 32.21875, "loss": 4.214460849761963 }, { "id": "identity_292275", "image": "A-OKVQA/images/117098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can these bikes be found? \nChoices:\n(A) bike show\n(B) garage\n(C) parking lot\n(D) road" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.875, "loss": 4.21443510055542 }, { "id": "identity_280385", "image": "TextVQA/images/42a7f9fb024854b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word comes after \"splash\"?" }, { "from": "gpt", "value": "The answer is landing" } ], "clip_score": 17.90625, "loss": 4.361812591552734 }, { "id": "identity_21755", "image": "PMC-VQA/images/PMC6617098_children-06-00075-f003_6078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormalities are present in the optic nerves? \nChoices:\n(A) thickening and abnormal T1 signal \n(B) thickening and abnormal T2 signal \n(C) thin and abnormal T1 signal \n(D) thin and abnormal T2 signal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.828125, "loss": 4.164400100708008 }, { "id": "identity_107200", "image": "VQA2.0/images/384146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.015625, "loss": 4.20679235458374 }, { "id": "identity_68217", "image": "AI2D/images/301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food sources does the hawk have in this ecosystem?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.265625, "loss": 4.508431434631348 }, { "id": "identity_1854", "image": "TQA/images/abc_question_images/seasons_12588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the winter solstice?\nChoices:\n(A) S\n(B) H\n(C) R\n(D) T" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.046875, "loss": 4.096383094787598 }, { "id": "identity_120398", "image": "TextVQA/images/049aa723a278c2aa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many headlights does the van have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.328125, "loss": 4.098485469818115 }, { "id": "identity_268937", "image": "DocVQA/images/fsmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which account should be charged as indicated on the provided page?" }, { "from": "gpt", "value": "The answer is PA73WBC44" } ], "clip_score": 18.328125, "loss": 4.5685529708862305 }, { "id": "identity_2809", "image": "DVQA/images/bar_train_00112250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars are there?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 16.96875, "loss": 4.11405086517334 }, { "id": "identity_282127", "image": "PMC-VQA/images/PMC8121193_FIG2_440377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the left thoracic mass originate from in the mediastinum? \nChoices:\n(A) Anterior Mediastinum\n(B) Posterior Mediastinum\n(C) Middle Mediastinum\n(D) Superior Mediastinum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.71875, "loss": 4.110034465789795 }, { "id": "identity_282229", "image": "PMC-VQA/images/PMC8816609_fig4_184593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is being utilized in the image provided? \nOptions:\n(A) X-ray imaging \n(B) Transmission electron microscopy \n(C) Scanning electron microscopy \n(D) Optical microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.047547817230225 }, { "id": "identity_53108", "image": "DVQA/images/bar_train_00172196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least number of people preferring 'union'?" }, { "from": "gpt", "value": "The answer is weapon" } ], "clip_score": 27.46875, "loss": 4.098062992095947 }, { "id": "identity_261955", "image": "FigureQA/images/68342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.75, "loss": 4.167403221130371 }, { "id": "identity_190807", "image": "PMC-VQA/images/PMC4651706_fig1_445703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the imaged leg?" }, { "from": "gpt", "value": "The answer is Anterior" } ], "clip_score": 28.34375, "loss": 4.110612869262695 }, { "id": "identity_309324", "image": "PMC-VQA/images/PMC7961081_f2_392000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in panel (E)?\nChoices:\n(A) MRI\n(B) Ultrasound\n(C) X-ray\n(D) CT scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.503653526306152 }, { "id": "identity_11623", "image": "TQA/images/question_images/seasons_2518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This determines the Earth's changing seasons.\nChoices:\n(A) Tilt on axis\n(B) Light\n(C) Gravity\n(D) Size of Earth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 4.427755832672119 }, { "id": "identity_155731", "image": "AI2D/images/422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the snail in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer" } ], "clip_score": 32.71875, "loss": 4.624538421630859 }, { "id": "identity_275460", "image": "TextVQA/images/717d24f51898c4ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current time displayed on the clock?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 22.765625, "loss": 4.39078426361084 }, { "id": "identity_205137", "image": "TextVQA/images/1d12a6441ea7408e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bracelets is the person wearing on their left wrist?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.390625, "loss": 5.173157691955566 }, { "id": "identity_332081", "image": "A-OKVQA/images/354721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skater's source of movement?\nChoices:\n(A) dog\n(B) gasoline\n(C) petrol\n(D) wheels" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.75, "loss": 4.191281318664551 }, { "id": "identity_3246", "image": "A-OKVQA/images/55868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the large bird?\nChoices:\n(A) flamingo\n(B) seahawk\n(C) stork\n(D) pelican" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.7421875, "loss": 4.203166484832764 }, { "id": "identity_4871", "image": "DVQA/images/bar_train_00127117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is person" } ], "clip_score": 19.84375, "loss": 4.14962911605835 }, { "id": "identity_30348", "image": "TextVQA/images/b2e74817b47adeae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the second board say?" }, { "from": "gpt", "value": "The answer is sugar" } ], "clip_score": 14.625, "loss": 5.111178398132324 }, { "id": "identity_284925", "image": "TextVQA/images/4bf43a7b2a898044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of center is indicated by the banner?" }, { "from": "gpt", "value": "The answer is life care" } ], "clip_score": 9.28125, "loss": 5.960277080535889 }, { "id": "identity_34573", "image": "DVQA/images/bar_train_00183530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the left?" }, { "from": "gpt", "value": "The answer is date" } ], "clip_score": 19.59375, "loss": 4.292961120605469 }, { "id": "identity_302540", "image": "PMC-VQA/images/PMC7402021_mrm28303-fig-0004_226287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the magnetic field with respect to this? \nChoices:\n(A) Parallel to this \n(B) Perpendicular to this \n(C) Tangent to this \n(D) Antiparallel to this" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.1796875, "loss": 4.368393421173096 }, { "id": "identity_296980", "image": "TQA/images/question_images/human_system_digestive_3746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the Esophagus among the following options? \n(A) Produces Insuline\n(B) Carries food, liquids, and saliva from the mouth to the stomach.\n(C) Pumps food and liquids into the intestines\n(D) Nothing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.140625, "loss": 4.085552215576172 }, { "id": "identity_33743", "image": "PMC-VQA/images/PMC6855902_FIG4_68470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate in the CT scan? \nChoices:\n(A) Failure of bone union \n(B) Presence of bone union \n(C) Inflammation \n(D) Infection " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.21875, "loss": 5.542496681213379 }, { "id": "identity_316831", "image": "VQA2.0/images/351734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the animal?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 17.984375, "loss": 4.327176094055176 }, { "id": "identity_149590", "image": "DocVQA/images/xngv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the additional location suggestion for the meeting?" }, { "from": "gpt", "value": "The answer is Europe" } ], "clip_score": 22.265625, "loss": 4.7350358963012695 }, { "id": "identity_120971", "image": "VQA-AS/images/2867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the blocks on the floor?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 16.359375, "loss": 4.222454071044922 }, { "id": "identity_216970", "image": "TextVQA/images/0440cabbf960c367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.734375, "loss": 4.535342216491699 }, { "id": "identity_250847", "image": "TQA/images/abc_question_images/human_system_eye_16121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure responsible for the highest visual acuity and explain its relationship with the optic nerve and the lens" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 31.34375, "loss": 5.175523281097412 }, { "id": "identity_307277", "image": "A-OKVQA/images/196899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the smaller glasses intended for?\n\nChoices:\n(A) milk\n(B) wine\n(C) tea\n(D) coffee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.796875, "loss": 4.3395490646362305 }, { "id": "identity_182505", "image": "A-OKVQA/images/439614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.171875, "loss": 4.085397720336914 }, { "id": "identity_23674", "image": "PMC-VQA/images/PMC4337252_fig03_360995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color represents the plaque development in the image? \nChoices:\n(A) Blue \n(B) Green \n(C) Yellow \n(D) Red " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.8359375, "loss": 4.604006767272949 }, { "id": "identity_195236", "image": "PMC-VQA/images/PMC7213657_FIG3_166476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is primarily shown in the image?" }, { "from": "gpt", "value": "The answer is Liver region" } ], "clip_score": 26.046875, "loss": 4.551459312438965 }, { "id": "identity_291138", "image": "VizWiz/images/VizWiz_train_00005219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of wood this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 27.8125, "loss": 4.2064385414123535 }, { "id": "identity_338237", "image": "A-OKVQA/images/124128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the symbol was marked out?\nChoices:\n(A) stop\n(B) go forward\n(C) turn left\n(D) turn right" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.328125, "loss": 4.355601787567139 }, { "id": "identity_274356", "image": "PMC-VQA/images/PMC8142494_Fig2_447919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the CBCT view displayed on the top left side of the figure, which quadrant is the focus located in?\nChoices:\n(A) lower right quadrant\n(B) upper right quadrant\n(C) lower left quadrant\n(D) upper left quadrant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.375, "loss": 4.317181587219238 }, { "id": "identity_206670", "image": "VQA2.0/images/563713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handles does the vase have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.234375, "loss": 4.109622955322266 }, { "id": "identity_267215", "image": "A-OKVQA/images/101013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following countries is it impossible for this to be?\n(A) United States\n(B) Jordan\n(C) Yemen\n(D) Saudi Arabia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3359375, "loss": 4.105844974517822 }, { "id": "identity_27405", "image": "PMC-VQA/images/PMC5106057_F2_94700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which chamber showed dilation? \nChoices:\n(A) right atria \n(B) left atria \n(C) right ventricle \n(D) left ventricle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.640625, "loss": 4.00740909576416 }, { "id": "identity_73937", "image": "ScienceQA/images/16956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distinctive feature of the animal in the image that contributes to its name?" }, { "from": "gpt", "value": "The answer is Loud howling calls" } ], "clip_score": 23.25, "loss": 4.026750564575195 }, { "id": "identity_222561", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary mode of transportation depicted in the image?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 22.90625, "loss": 4.469880104064941 }, { "id": "identity_308490", "image": "PMC-VQA/images/PMC8660657_F2_136124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was performed on the patient?\nChoices:\n(A) CT scan \n(B) SWI MRI scan \n(C) PET scan \n(D) Ultrasound scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.890625, "loss": 4.17446231842041 }, { "id": "identity_320609", "image": "A-OKVQA/images/362604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the train parked?\nChoices:\n(A) street\n(B) mall\n(C) garage\n(D) station" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6875, "loss": 4.416615962982178 }, { "id": "identity_296013", "image": "A-OKVQA/images/123907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which emotion is the woman most likely experiencing? (A) hate, (B) anger, (C) fear, or (D) anticipation?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.9296875, "loss": 4.307747840881348 }, { "id": "identity_15644", "image": "PMC-VQA/images/PMC5627796_F3_224139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe is the hemangioma located?\nChoices:\n(A) Left anterior lobe \n(B) Left posterior lobe \n(C) Right anterior lobe \n(D) Right posterior lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.078125, "loss": 4.20211124420166 }, { "id": "identity_331081", "image": "PMC-VQA/images/PMC6824786_F3_59798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the signal found in the body?\nChoices:\n(A) left thigh muscle area\n(B) right thigh muscle area\n(C) left calf muscle area\n(D) right calf muscle area" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.71875, "loss": 4.078524112701416 }, { "id": "identity_194268", "image": "PMC-VQA/images/PMC5400329_F2_155452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the arrow pointing to in both images?" }, { "from": "gpt", "value": "The answer is A lung abnormality" } ], "clip_score": 25.09375, "loss": 4.669636249542236 }, { "id": "identity_25060", "image": "Super-CLEVR/images/superCLEVR_new_000616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of small metallic utility bikes that are to the left of the yellow shiny chopper and small cyan metallic things that are to the right of the gray metal car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 34.6875, "loss": 4.864068984985352 }, { "id": "identity_33570", "image": "VQA2.0/images/39080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 19.25, "loss": 4.54580020904541 }, { "id": "identity_311503", "image": "DocVQA/images/khwg0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written in the top right corner?" }, { "from": "gpt", "value": "The answer is EXAMPLE:Chicago Region" } ], "clip_score": 19.625, "loss": 4.536405563354492 }, { "id": "identity_272886", "image": "PMC-VQA/images/PMC6022407_Fig8_334272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following terms describes the upper layer of the microphotograph: Granular, Polymorphic, Unstained, or Untreated?\nChoices:\n(A) Granular \n(B) Polymorphic \n(C) Unstained \n(D) Untreated" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.671875, "loss": 4.665552616119385 }, { "id": "identity_278532", "image": "AI2D/images/3368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the brightly colored parts of a flower that surround the reproductive organs, which is a division of the corolla?\nChoices:\n(A) Anthers\n(B) Pistil\n(C) Petal\n(D) Sepal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.71875, "loss": 4.466139793395996 }, { "id": "identity_227749", "image": "TextVQA/images/25d8358103eb68ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of water on the table, and how many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Dasani, three" } ], "clip_score": 22.5, "loss": 4.660270690917969 }, { "id": "identity_261752", "image": "AI2D/images/3528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to hemicellulose?\nChoices:\n(A) c\n(B) b\n(C) e\n(D) d" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6328125, "loss": 4.129334926605225 }, { "id": "identity_260104", "image": "A-OKVQA/images/147577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary theme depicted in the mural, and how does the artist use specific elements to convey this theme?" }, { "from": "gpt", "value": "The answer is Historical train travel and social interactions" } ], "clip_score": 35.96875, "loss": 5.194483280181885 }, { "id": "identity_208687", "image": "TextVQA/images/35c02a86ce470b5a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys are visible on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.171875, "loss": 4.5806074142456055 }, { "id": "identity_201423", "image": "A-OKVQA/images/362812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handrails are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.421875, "loss": 4.743686676025391 }, { "id": "identity_292933", "image": "A-OKVQA/images/579862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of vehicle are the individuals wearing helmets riding? Is it a:\n(A) unicycle\n(B) moped\n(C) bike\n(D) scooter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.9375, "loss": 4.2953877449035645 }, { "id": "identity_309445", "image": "ScienceQA/images/18727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Olympia\n(B) Sacramento\n(C) Salem\n(D) Helena" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.359375, "loss": 4.324985980987549 }, { "id": "identity_268094", "image": "ScienceQA/images/4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) breakable\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.15625, "loss": 4.142482757568359 }, { "id": "identity_29607", "image": "ScienceQA/images/12004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Kentucky?\nChoices:\n(A) Louisville\n(B) Columbia\n(C) Frankfort\n(D) Richmond" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5625, "loss": 4.082881927490234 }, { "id": "identity_235042", "image": "VQA-AS/images/6216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of objects are present in the room, and what are they?" }, { "from": "gpt", "value": "The answer is eleven distinct types of objects" } ], "clip_score": 23.3125, "loss": 4.49812650680542 }, { "id": "identity_321269", "image": "ScienceQA/images/1101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of this movement?\nChoices:\n(A) toward the giraffe\n(B) away from the giraffe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.75, "loss": 4.296333312988281 }, { "id": "identity_289121", "image": "A-OKVQA/images/44483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are typically done by people under the umbrellas here?\nChoices:\n(A) knit\n(B) cheer\n(C) dine\n(D) sleep" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.421875, "loss": 4.596468925476074 }, { "id": "identity_333291", "image": "TQA/images/question_images/glaciers_6937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is found below the Headwall?\nChoices:\n(A) Terminal Moraine\n(B) Zone of Abrasion\n(C) Zone of Plucking\n(D) Glacial Ice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.0625, "loss": 4.328001976013184 }, { "id": "identity_302551", "image": "PMC-VQA/images/PMC6383305_rcr2402-fig-0001_438489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being examined in this histology?\nChoices:\n(A) Liver\n(B) Heart\n(C) Lungs\n(D) Kidneys" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.078125, "loss": 4.063608646392822 }, { "id": "identity_319446", "image": "AI2D/images/2688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which season does the south pole receive no sunlight?\nChoices:\n(A) fall\n(B) winter\n(C) summer\n(D) spring" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.296875, "loss": 4.930913925170898 }, { "id": "identity_310510", "image": "FigureQA/images/83076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2421875, "loss": 4.198046684265137 }, { "id": "identity_277087", "image": "FigureQA/images/29892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.546875, "loss": 4.616062164306641 }, { "id": "identity_302646", "image": "Super-CLEVR/images/superCLEVR_new_001409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus left of a metallic bicycle in front of a tiny bicycle?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 27.109375, "loss": 5.223671913146973 }, { "id": "identity_25738", "image": "TabMWP/images/27943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Some travelers in an airport compared how far they had flown that day. Who flew the shortest distance?\nChoices:\n(A) Nora\n(B) Riley\n(C) Sandeep\n(D) Brittany" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.1875, "loss": 4.067595481872559 }, { "id": "identity_56685", "image": "DocVQA/images/tjxg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word of the letter?" }, { "from": "gpt", "value": "The answer is view" } ], "clip_score": 15.71875, "loss": 5.525946617126465 }, { "id": "identity_195206", "image": "PMC-VQA/images/PMC5794130_F1_270365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red arrow pointing to a specific area?" }, { "from": "gpt", "value": "The answer is Possible lesion or area of pathology" } ], "clip_score": 29.828125, "loss": 4.252603054046631 }, { "id": "identity_78438", "image": "ScienceQA/images/17348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fallen logs can be counted in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.703125, "loss": 4.344666957855225 }, { "id": "identity_933", "image": "AI2D/images/2648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The stages in the solar cycle are labeled with letters. Which stage is shown at A?\nChoices:\n(A) Winter Solstice\n(B) Vernal Equinox\n(C) Autumnal Equinox\n(D) Summer Solstice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.46875, "loss": 4.00730037689209 }, { "id": "identity_192377", "image": "Super-CLEVR/images/superCLEVR_new_001722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle in the foreground?" }, { "from": "gpt", "value": "The answer is Black and green" } ], "clip_score": 27.453125, "loss": 4.03212308883667 }, { "id": "identity_295626", "image": "PMC-VQA/images/PMC4436458_fig2_387646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific part of the heart is depicted in this M-mode tracing?\nOptions:\n(A) Aorta\n(B) Superior vena cava\n(C) Interventricular septum\n(D) Left atrium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.96875, "loss": 5.073027610778809 }, { "id": "identity_330046", "image": "A-OKVQA/images/503832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is closest to one of the giraffes?\nChoices:\n(A) baby eel\n(B) hat\n(C) zebra\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.609375, "loss": 4.2534708976745605 }, { "id": "identity_294194", "image": "FigureQA/images/87157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the option with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.1875, "loss": 4.120257377624512 }, { "id": "identity_217451", "image": "TextVQA/images/3a9b98a51f4976c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the Times Sq. Shuttle to Southbound 7 Ave?" }, { "from": "gpt", "value": "The answer is Turn here" } ], "clip_score": 26.875, "loss": 5.357877731323242 }, { "id": "identity_259242", "image": "PMC-VQA/images/PMC4285820_fig05_348926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure shown magnified in Box (A) and describe its function in the organism" }, { "from": "gpt", "value": "The answer is Sensillum, detects environmental stimuli" } ], "clip_score": 28.625, "loss": 4.210352897644043 }, { "id": "identity_329432", "image": "IconQA/images/92299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circular face on this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.1875, "loss": 4.212329387664795 }, { "id": "identity_270074", "image": "IconQA/images/28008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on? \nOptions:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.625, "loss": 4.0230536460876465 }, { "id": "identity_21023", "image": "A-OKVQA/images/496617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the occupancy of this room?\nChoices:\n(A) partial\n(B) one person\n(C) full\n(D) empty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5078125, "loss": 4.371161937713623 }, { "id": "identity_19421", "image": "AI2D/images/3525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Biology A small, typically round granular body composed of protein and RNA in the nucleus of a cell is called\nChoices:\n(A) golgi\n(B) mitochondrion\n(C) chromoplast\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.296875, "loss": 4.062962055206299 }, { "id": "identity_279513", "image": "VizWiz/images/VizWiz_train_00004572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the species of this caterpillar?" }, { "from": "gpt", "value": "The answer is orange black" } ], "clip_score": 28.0625, "loss": 5.947526931762695 }, { "id": "identity_6775", "image": "FigureQA/images/83579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Gold have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.96875, "loss": 4.172221660614014 }, { "id": "identity_310581", "image": "TQA/images/question_images/human_system_reproductory_7045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located directly beneath labium minora?\nChoices:\n(A) urethra\n(B) urinary bladder\n(C) labium majora\n(D) clitoris" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.3125, "loss": 4.53008508682251 }, { "id": "identity_206734", "image": "A-OKVQA/images/165376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the primary seating furniture in the living room?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 23.359375, "loss": 4.117546081542969 }, { "id": "identity_27486", "image": "A-OKVQA/images/119443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why si the board hanging from the box?\nChoices:\n(A) is stuck\n(B) bounced there\n(C) showing off\n(D) fell there" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4609375, "loss": 4.059754848480225 }, { "id": "identity_12818", "image": "A-OKVQA/images/337277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the smaller laptop screen?\nChoices:\n(A) cat\n(B) dog\n(C) baby picture\n(D) screen saver" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.328125, "loss": 4.086090564727783 }, { "id": "identity_189126", "image": "PMC-VQA/images/PMC6691837_f3-cln_74p1_22906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located directly above the arrow in the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 25.75, "loss": 4.721651554107666 }, { "id": "identity_118748", "image": "TextVQA/images/a58d1f724f714080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the signboard's background?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 14.40625, "loss": 4.6036176681518555 }, { "id": "identity_266122", "image": "TQA/images/question_images/sun_layers_6308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the cap that covers the core of the chromosphere?\n(A) sunspot\n(B) corona\n(C) prominence\n(D) radiative zone" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.265625, "loss": 4.102025032043457 }, { "id": "identity_12242", "image": "Super-CLEVR/images/superCLEVR_new_001559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the shiny thing that is both left of the purple shiny motorbike and behind the small shiny bicycle?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 23.875, "loss": 4.004228115081787 }, { "id": "identity_77156", "image": "AI2D/images/3089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dendrites are visible in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.375, "loss": 4.3158721923828125 }, { "id": "identity_201367", "image": "A-OKVQA/images/75299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible traffic signs are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.171875, "loss": 4.644647121429443 }, { "id": "identity_215131", "image": "VQA2.0/images/391219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many food trucks are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.84375, "loss": 4.331367492675781 }, { "id": "identity_295182", "image": "VizWiz/images/VizWiz_train_00009568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could someone please inform me about what this is or its age?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.7578125, "loss": 4.635617256164551 }, { "id": "identity_284932", "image": "ScienceQA/images/5204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nChoices:\n(A) sweet\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.4375, "loss": 4.076491355895996 }, { "id": "identity_6597", "image": "A-OKVQA/images/474268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the floor?\nChoices:\n(A) antelope\n(B) surfboard\n(C) pumpkin\n(D) egg sandwich" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7578125, "loss": 4.179475784301758 }, { "id": "identity_317505", "image": "TabMWP/images/32250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the numbers of computers counted in the classrooms?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.125, "loss": 4.036533355712891 }, { "id": "identity_262794", "image": "AI2D/images/2937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the outermost part from the following options:\n(A) Membrane\n(B) Outer arm\n(C) Radial link\n(D) Nexin Bridge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.125, "loss": 4.415766716003418 }, { "id": "identity_23239", "image": "PMC-VQA/images/PMC6702894_F3_26533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion visible? \nChoices:\n(A) On the corresponding image \n(B) On the distal interphalangeal joint image \n(C) On the fat-suppressed image \n(D) On the proximal interphalangeal joint image " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.953125, "loss": 4.143744468688965 }, { "id": "identity_222990", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to features in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.15625, "loss": 4.270151615142822 }, { "id": "identity_18357", "image": "A-OKVQA/images/281696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do you go in this street if you want to buy candy?\nChoices:\n(A) restaurant\n(B) convenience store\n(C) shoes store\n(D) bank" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8203125, "loss": 4.004494667053223 }, { "id": "identity_21149", "image": "A-OKVQA/images/213592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these men on horseback holding in their hands?\nChoices:\n(A) brooms\n(B) mallets\n(C) clubs\n(D) bats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.190258026123047 }, { "id": "identity_69650", "image": "ScienceQA/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 28.203125, "loss": 4.007054328918457 }, { "id": "identity_137115", "image": "DocVQA/images/nkfw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the contact person at the agency?" }, { "from": "gpt", "value": "The answer is Paula Carter" } ], "clip_score": 21.046875, "loss": 4.328273773193359 }, { "id": "identity_293830", "image": "IconQA/images/18385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Respond with a time word, not a numerical value. It is (_) after three." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 30.046875, "loss": 4.253805637359619 }, { "id": "identity_115735", "image": "A-OKVQA/images/207486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.828125, "loss": 4.229513645172119 }, { "id": "identity_311392", "image": "PMC-VQA/images/PMC7220632_F1_168946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is causing regurgitation in this artwork?\nChoices:\n(A) Dilation of the left ventricle \n(B) Dilation of the right ventricle \n(C) Pulmonary artery hypertension \n(D) Aortic valve stenosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.377624034881592 }, { "id": "identity_321995", "image": "A-OKVQA/images/502307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the large white object behind the seat of this vehicle used for?\nChoices:\n(A) sitting\n(B) storage\n(C) tricks\n(D) towing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.932082653045654 }, { "id": "identity_211281", "image": "A-OKVQA/images/147653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the background?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.515625, "loss": 4.732170104980469 }, { "id": "identity_281002", "image": "TQA/images/question_images/photosynthesis_4107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of air is absorbed by the leaves in the image: Oxygen, Minerals, Carbon Dioxide, or Glucose?\nChoices:\n(A) Oxygen\n(B) Minerals\n(C) Carbon Dioxide\n(D) Glucose" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.734375, "loss": 4.077007293701172 }, { "id": "identity_268938", "image": "A-OKVQA/images/559315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location does this scene occur? \nOptions:\n(A) Morocco\n(B) Israel\n(C) America\n(D) Greece" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.046875, "loss": 4.757085800170898 }, { "id": "identity_321608", "image": "FigureQA/images/87393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Mint the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.140625, "loss": 4.179266929626465 }, { "id": "identity_294507", "image": "PMC-VQA/images/PMC7348933_brainsci-10-00375-f002_209563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which specific location is the lesion situated?\nChoices:\n(A) Right side of the Corpus amygdaloideum\n(B) Left side of the Corpus amygdaloideum\n(C) Parietal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.0625, "loss": 4.62869930267334 }, { "id": "identity_327498", "image": "DocVQA/images/ggjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the manager of a public service company?" }, { "from": "gpt", "value": "The answer is William Dutch" } ], "clip_score": 18.625, "loss": 4.257424831390381 }, { "id": "identity_336823", "image": "FigureQA/images/95371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Hot Pink the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.6875, "loss": 4.008628845214844 }, { "id": "identity_131100", "image": "Super-CLEVR/images/superCLEVR_new_000694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the purple airplane?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 29.546875, "loss": 4.20964241027832 }, { "id": "identity_5150", "image": "FigureQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.6875, "loss": 4.276106834411621 }, { "id": "identity_321316", "image": "DVQA/images/bar_train_00149797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.84375, "loss": 4.227684497833252 }, { "id": "identity_284347", "image": "IconQA/images/49531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do all the snowmen have enough carrot noses?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.875, "loss": 4.2908124923706055 }, { "id": "identity_278682", "image": "FigureQA/images/15273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Yellow considered the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.015625, "loss": 4.483284950256348 }, { "id": "identity_122582", "image": "PMC-VQA/images/PMC3605106_F2_193660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall condition of the retina based on the image?" }, { "from": "gpt", "value": "The answer is Normal" } ], "clip_score": 31.828125, "loss": 5.976535797119141 }, { "id": "identity_7107", "image": "PMC-VQA/images/PMC3691725_F5_213673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image depicting? \nChoices:\n(A) Electron microscopy of a virus particle \n(B) Genetic sequencing data \n(C) Protein structure predictions \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.765625, "loss": 4.014352321624756 }, { "id": "identity_17681", "image": "DVQA/images/bar_train_00188483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left in each group?" }, { "from": "gpt", "value": "The answer is press" } ], "clip_score": 17.015625, "loss": 4.15179967880249 }, { "id": "identity_142", "image": "VQA2.0/images/252779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the red and white stripes?" }, { "from": "gpt", "value": "The answer is on platforms" } ], "clip_score": 19.65625, "loss": 4.869757175445557 }, { "id": "identity_221386", "image": "VQA2.0/images/267205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.453125, "loss": 4.574753761291504 }, { "id": "identity_297432", "image": "Super-CLEVR/images/superCLEVR_new_000273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the tiny metallic object, which matches the color of the rubber object, have?" }, { "from": "gpt", "value": "The answer is chopper" } ], "clip_score": 27.984375, "loss": 4.016961574554443 }, { "id": "identity_21128", "image": "PMC-VQA/images/PMC8684578_ccr35238-fig-0001_142173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which hemisphere appears enlarged in the image? \nChoices:\n(A) Left hemisphere \n(B) Right hemisphere \n(C) Both hemispheres \n(D) Neither hemisphere. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.65625, "loss": 4.321221351623535 }, { "id": "identity_34819", "image": "A-OKVQA/images/169226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What photographic technique was used to capture the movement of traffic on the street?\nChoices:\n(A) hdr\n(B) time-lapse\n(C) panorama\n(D) bokeh" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 4.144841194152832 }, { "id": "identity_267091", "image": "FigureQA/images/58962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Olive Drab or Web Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.390625, "loss": 4.637019157409668 }, { "id": "identity_307453", "image": "PMC-VQA/images/PMC4826883_F3_20100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of these images taken with an electron microscope?\nChoices:\n(A) Bacteria\n(B) Viruses\n(C) Human cells\n(D) Fungi" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.85794734954834 }, { "id": "identity_313789", "image": "ScienceQA/images/2034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this object made of?\nChoices:\n(A) wool\n(B) rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.875, "loss": 4.24788761138916 }, { "id": "identity_279602", "image": "PMC-VQA/images/PMC7503285_ijms-21-06163-f007_257397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side was the control used?\nChoices:\n(A) Right side\n(B) Left side\n(C) Both sides\n(D) Neither side" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9140625, "loss": 4.260742664337158 }, { "id": "identity_286455", "image": "A-OKVQA/images/527098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can you observe a scene similar to what is occurring behind the boats?\nChoices:\n(A) parking garage\n(B) king kullen\n(C) six flags\n(D) costco" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.1875, "loss": 4.224836349487305 }, { "id": "identity_292540", "image": "A-OKVQA/images/83060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of bag is suspended from the cupboard?\nOptions:\n(A) grocery bag\n(B) backpack\n(C) purse\n(D) satchel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.21875, "loss": 4.368167877197266 }, { "id": "identity_217702", "image": "A-OKVQA/images/356015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing helmets?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 21.375, "loss": 5.046076774597168 }, { "id": "identity_286565", "image": "A-OKVQA/images/430155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the overhead wires used?\nChoices:\n(A) powering homes\n(B) telephone lines\n(C) streetcars\n(D) powering businesses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0625, "loss": 4.448386192321777 }, { "id": "identity_110441", "image": "TextVQA/images/316948ded3f3f4ba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is advertised on the sign to the left of the image?" }, { "from": "gpt", "value": "The answer is Offices to let" } ], "clip_score": 28.71875, "loss": 4.380418300628662 }, { "id": "identity_291157", "image": "PMC-VQA/images/PMC7267220_F5_182993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image attempting to estimate?\nChoices:\n(A) Brain activity \n(B) Brain weight \n(C) Subcortical structural volume \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.734375, "loss": 4.118393898010254 }, { "id": "identity_209277", "image": "VQA2.0/images/504021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is turning left?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 18.71875, "loss": 4.101693630218506 }, { "id": "identity_265667", "image": "Super-CLEVR/images/superCLEVR_new_001259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray matte scooter and the metallic tandem bike of equal size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.015625, "loss": 5.430858612060547 }, { "id": "identity_280402", "image": "FigureQA/images/28800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.27895975112915 }, { "id": "identity_326520", "image": "A-OKVQA/images/122839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this holding?\nChoices:\n(A) torch\n(B) chicken\n(C) cane\n(D) pistol" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 5.24609375, "loss": 4.209906101226807 }, { "id": "identity_65185", "image": "AI2D/images/914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role do plants play in this food web?" }, { "from": "gpt", "value": "The answer is Primary producer" } ], "clip_score": 35.90625, "loss": 5.149131774902344 }, { "id": "identity_311755", "image": "PMC-VQA/images/PMC7249772_FIG3_178137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of fracture is shown in the X-ray?\nChoices:\n(A) Longitudinal fracture\n(B) Comminuted fracture\n(C) Oblique fracture\n(D) Transverse fracture" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.921875, "loss": 4.0459723472595215 }, { "id": "identity_155406", "image": "TQA/images/question_images/optics_lense_types_9162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light rays are shown entering the lens?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 29.4375, "loss": 4.0446696281433105 }, { "id": "identity_323422", "image": "DocVQA/images/ptwn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the department name?" }, { "from": "gpt", "value": "The answer is field sales" } ], "clip_score": 17.375, "loss": 5.010428428649902 }, { "id": "identity_289157", "image": "A-OKVQA/images/140068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the man wearing a glove?\nChoices:\n(A) fashion\n(B) warmth\n(C) to catch\n(D) health" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.796875, "loss": 4.574937343597412 }, { "id": "identity_35994", "image": "PMC-VQA/images/PMC6014307_F0005_332052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What plane is used to visualize the collection in peripelvic region? \nChoices:\n(A) Sagittal plane \n(B) Transverse plane \n(C) Coronal plane \n(D) Axial plane " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.46875, "loss": 4.497145175933838 }, { "id": "identity_323137", "image": "PMC-VQA/images/PMC9188905_Fig1_309252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the notable feature on this scan of the chest? \nChoices:\n(A) Bilateral diffuse patchy miliary nodules \n(B) Normal CT scan of the chest \n(C) Increased lung markings with patchy infiltration \n(D) Unilateral diffuse patchy miliary nodules." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.359375, "loss": 4.254347801208496 }, { "id": "identity_277085", "image": "VQA2.0/images/192994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the collar?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.046875, "loss": 4.178854465484619 }, { "id": "identity_109425", "image": "VQA2.0/images/374135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the truck cab?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 13.0390625, "loss": 4.238310813903809 }, { "id": "identity_213261", "image": "A-OKVQA/images/88477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cow predominantly?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.265625, "loss": 4.194851398468018 }, { "id": "identity_293848", "image": "PMC-VQA/images/PMC9200539_fig6_311851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the observation in the peritoneum of the patient among the following choices: (A) cystic shadows without enhancement, (B) multiple nodular shadows, (C) uniform enhancement, or (D) extensive fluid density shadows?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.296875, "loss": 4.055902481079102 }, { "id": "identity_329905", "image": "PMC-VQA/images/PMC7770780_Fig2_331562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kinds of materials are depicted in the images?\nChoices:\n(A) Polymers\n(B) Metals\n(C) Ceramics\n(D) Organic compounds" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.71875, "loss": 4.576389312744141 }, { "id": "identity_190672", "image": "TextVQA/images/9aa0e3eefab619da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.375, "loss": 4.645031929016113 }, { "id": "identity_263682", "image": "ScienceQA/images/5216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina?\nChoices:\n(A) Cheyenne\n(B) Raleigh\n(C) Fargo\n(D) Bismarck" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.403063774108887 }, { "id": "identity_13845", "image": "Super-CLEVR/images/superCLEVR_new_000843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny minivan of the same color as the shiny road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.578125, "loss": 5.399080753326416 }, { "id": "identity_116827", "image": "PMC-VQA/images/PMC6722553_nutrients-11-01890-f001_32822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the markers in the image?" }, { "from": "gpt", "value": "The answer is Round or circular" } ], "clip_score": 13.4375, "loss": 4.204084396362305 }, { "id": "identity_30875", "image": "VizWiz/images/VizWiz_train_00013151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what kind of flower this is?" }, { "from": "gpt", "value": "The answer is wild flowers" } ], "clip_score": 25.15625, "loss": 4.24259090423584 }, { "id": "identity_202063", "image": "PMC-VQA/images/PMC4645184_f6_443921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the structures that appear to be surrounding the red-stained areas?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.3828125, "loss": 4.286165714263916 }, { "id": "identity_295648", "image": "FigureQA/images/5779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Green or Deep Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.515625, "loss": 4.665497303009033 }, { "id": "identity_282175", "image": "IconQA/images/92395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by filling in the blank: It is (_) after four." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 29.015625, "loss": 4.781096458435059 }, { "id": "identity_187453", "image": "A-OKVQA/images/508896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the right side of the half pipe?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.390625, "loss": 4.716104030609131 }, { "id": "identity_323111", "image": "PMC-VQA/images/PMC9374607_Fig1_371743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain is affected by the occlusion?\nChoices:\n(A) Right hemisphere\n(B) Left hemisphere\n(C) Cerebellum\n(D) Brainstem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.279363632202148 }, { "id": "identity_34715", "image": "PMC-VQA/images/PMC7406183_FIG3_227716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the sulcus located on the coronal view? \nChoices:\n(A) Epsilon sulcus \n(B) Phi sulcus \n(C) Omega sulcus \n(D) Delta sulcus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.90625, "loss": 4.061239242553711 }, { "id": "identity_220244", "image": "VQA2.0/images/269561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups can be seen on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.4375, "loss": 4.510826587677002 }, { "id": "identity_34364", "image": "VQA2.0/images/495043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of work are the men doing in the photo?" }, { "from": "gpt", "value": "The answer is road" } ], "clip_score": 24.890625, "loss": 5.604288578033447 }, { "id": "identity_330589", "image": "A-OKVQA/images/250001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the individual sitting high above the tennis net?\nChoices:\n(A) referee\n(B) trainer\n(C) coach\n(D) announcer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.1875, "loss": 4.436888694763184 }, { "id": "identity_221431", "image": "VQA-AS/images/16185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is to the right of the girl?" }, { "from": "gpt", "value": "The answer is A campfire" } ], "clip_score": 30.796875, "loss": 4.3880486488342285 }, { "id": "identity_271944", "image": "PMC-VQA/images/PMC6970096_FIG2_102919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view provides a clearer depiction of knee effusion?\nChoices:\n(A) Sagittal view\n(B) Transverse view\n(C) Both views show it equally clearly\n(D) No view shows it clearly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.250553607940674 }, { "id": "identity_185863", "image": "PMC-VQA/images/PMC7245847_Fig1_176868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many axial slices are presented in this image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.140625, "loss": 4.453266620635986 }, { "id": "identity_113617", "image": "VQA-AS/images/12623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 31.453125, "loss": 4.382739067077637 }, { "id": "identity_1874", "image": "PMC-VQA/images/PMC7560370_materials-13-04063-f001_274236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the texture of the outer layer of the seed? \nChoices:\n(A) Soft \n(B) Hard \n(C) Wet \n(D) Fuzzy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.451849937438965 }, { "id": "identity_143767", "image": "DocVQA/images/mzkb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the photographer of the photo according to the image?" }, { "from": "gpt", "value": "The answer is Mike Gordon" } ], "clip_score": 17.875, "loss": 4.296268939971924 }, { "id": "identity_103970", "image": "TextVQA/images/ff4471f53a834d7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is present on the central object of this image?" }, { "from": "gpt", "value": "The answer is A radial line pattern" } ], "clip_score": 16.0, "loss": 4.0904951095581055 }, { "id": "identity_18434", "image": "A-OKVQA/images/538809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this store sell?\nChoices:\n(A) running clothes\n(B) bikes\n(C) doughnuts\n(D) running shoes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.203125, "loss": 4.137902736663818 }, { "id": "identity_318163", "image": "ScienceQA/images/5486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) sticky\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.09375, "loss": 4.58060359954834 }, { "id": "identity_30430", "image": "PlotQA/images/62059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the 3rd group of bars from the top ?" }, { "from": "gpt", "value": "The answer is Malaysia" } ], "clip_score": 17.03125, "loss": 4.078938007354736 }, { "id": "identity_66278", "image": "AI2D/images/2382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the life cycle depicted in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.078125, "loss": 4.177874565124512 }, { "id": "identity_336886", "image": "PMC-VQA/images/PMC7505619_FIG3_258226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging technique was utilized in this study?\nChoices:\n(A) CT scan\n(B) MRI scan\n(C) X-ray scan\n(D) PET scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 4.143697738647461 }, { "id": "identity_20204", "image": "PMC-VQA/images/PMC5821967_fig1_278985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown in the sagittal plane? \nChoices:\n(A) Axial plane T2-weighted image \n(B) Gadolinium-enhanced T1-weighted image \n(C) Coronal plane T1-weighted image \n(D) Sagittal plane T2-weighted image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.34375, "loss": 4.518134117126465 }, { "id": "identity_38084", "image": "IconQA/images/74873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.1875, "loss": 4.712811470031738 }, { "id": "identity_2463", "image": "AI2D/images/2450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process occurs at stage E?\nChoices:\n(A) Hatching of eggs\n(B) Pupating\n(C) Emergence of adult\n(D) Larval cycle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.796875, "loss": 4.608763694763184 }, { "id": "identity_338651", "image": "VQA2.0/images/217339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the baby have around their neck?" }, { "from": "gpt", "value": "The answer is bib" } ], "clip_score": 26.671875, "loss": 4.496833801269531 }, { "id": "identity_320225", "image": "ScienceQA/images/12735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, heat was transferred from () to ().\nChoices:\n(A) each salmon . . . the surroundings\n(B) the surroundings . . . each salmon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.234375, "loss": 4.042536735534668 }, { "id": "identity_216456", "image": "VQA2.0/images/465005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.328125, "loss": 4.234353065490723 }, { "id": "identity_213349", "image": "A-OKVQA/images/61498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Three players" } ], "clip_score": 21.25, "loss": 4.710143089294434 }, { "id": "identity_264464", "image": "IconQA/images/88484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of stars is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.984375, "loss": 4.054062843322754 }, { "id": "identity_223338", "image": "VizWiz/images/VizWiz_train_00011989.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many crackers are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.328125, "loss": 4.133526802062988 }, { "id": "identity_320094", "image": "TextVQA/images/368c7d8d7c3cc65c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the rest of the cafe name on the bottle with the orange lid?" }, { "from": "gpt", "value": "The answer is deli" } ], "clip_score": 23.21875, "loss": 5.204895973205566 }, { "id": "identity_32947", "image": "A-OKVQA/images/293081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of water body is most likely in the service of this dock?\nChoices:\n(A) river\n(B) ocean\n(C) lake\n(D) sea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.75, "loss": 4.352058410644531 }, { "id": "identity_108912", "image": "VQA2.0/images/26981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food groups are represented on this plate?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.515625, "loss": 4.316236972808838 }, { "id": "identity_121438", "image": "VQA2.0/images/152815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many framed items are mounted on the walls?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.015625, "loss": 4.713223934173584 }, { "id": "identity_29304", "image": "A-OKVQA/images/239005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is the small plate?\nChoices:\n(A) circle\n(B) octagon\n(C) square\n(D) hexagon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.21875, "loss": 4.140993118286133 }, { "id": "identity_195031", "image": "A-OKVQA/images/298540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are lying on the ground?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.4375, "loss": 4.32599401473999 }, { "id": "identity_214206", "image": "PMC-VQA/images/PMC9088844_F1_276123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the patient's body is marked with a letter in this image?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 30.09375, "loss": 4.536774158477783 }, { "id": "identity_269200", "image": "AI2D/images/2975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does B represent?\nChoices:\n(A) lower lip\n(B) gums\n(C) tongue\n(D) teeth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.016852378845215 }, { "id": "identity_239033", "image": "A-OKVQA/images/410105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for the clothes being on hangers in a living room, and which one is most likely based on the context of the room?" }, { "from": "gpt", "value": "The answer is Airing out" } ], "clip_score": 25.53125, "loss": 5.0901384353637695 }, { "id": "identity_267347", "image": "FigureQA/images/60583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.265625, "loss": 4.1161651611328125 }, { "id": "identity_4068", "image": "Super-CLEVR/images/superCLEVR_new_001932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small shiny things that have the same color as the small rubber bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.5625, "loss": 5.19455623626709 }, { "id": "identity_324747", "image": "ScienceQA/images/8698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) Africa\n(B) Europe\n(C) Asia\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.640625, "loss": 4.444947242736816 }, { "id": "identity_305575", "image": "PMC-VQA/images/PMC7644528_Fig1_292069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in this signify? \nChoices:\n(A) Disruption of basement membrane \n(B) Stroma \n(C) Lumina \n(D) Basement membrane integrity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.296875, "loss": 5.119719982147217 }, { "id": "identity_220356", "image": "A-OKVQA/images/77745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of natural barrier can be seen beyond the harbor?" }, { "from": "gpt", "value": "The answer is A beach" } ], "clip_score": 24.90625, "loss": 4.099503040313721 }, { "id": "identity_272044", "image": "A-OKVQA/images/453485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way will the water be heated?\nChoices:\n(A) sink tap\n(B) pot\n(C) oven\n(D) kettle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.6875, "loss": 4.018702030181885 }, { "id": "identity_326192", "image": "A-OKVQA/images/315621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in front of the building with yellow trim?\nChoices:\n(A) monkey\n(B) cow\n(C) motor bike\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.875, "loss": 4.1923418045043945 }, { "id": "identity_326943", "image": "PMC-VQA/images/PMC3755418_fig4_227731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding in this area?\nChoices:\n(A) lymphadenopathy\n(B) metastasis\n(C) benign tissue\n(D) abscess" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.627901077270508 }, { "id": "identity_15420", "image": "VQA2.0/images/168558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the plane?" }, { "from": "gpt", "value": "The answer is sky" } ], "clip_score": 24.234375, "loss": 4.3602399826049805 }, { "id": "identity_210357", "image": "A-OKVQA/images/202004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags are visible on the cart?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.765625, "loss": 4.306410312652588 }, { "id": "identity_152626", "image": "ScienceQA/images/5688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wings does the bird in the image have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.609375, "loss": 4.194516181945801 }, { "id": "identity_332173", "image": "PMC-VQA/images/PMC5817300_fig6_277598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the highlighted area in the image indicate?\nChoices:\n(A) The presence of disease \n(B) The response to treatment \n(C) The need for additional treatment \n(D) The worsening of the condition" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.734375, "loss": 4.838664531707764 }, { "id": "identity_118419", "image": "VizWiz/images/VizWiz_train_00014616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible food remnants on the plate?" }, { "from": "gpt", "value": "The answer is Yes, small specks visible" } ], "clip_score": 23.734375, "loss": 4.095656394958496 }, { "id": "identity_118933", "image": "Super-CLEVR/images/superCLEVR_new_000719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible on the side of the bus facing the camera?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.25, "loss": 4.266334533691406 }, { "id": "identity_293112", "image": "TextVQA/images/1ce8e44536df5739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a large computer or a television?" }, { "from": "gpt", "value": "The answer is samsung" } ], "clip_score": 14.7265625, "loss": 4.713409900665283 }, { "id": "identity_6268", "image": "FigureQA/images/49327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0625, "loss": 4.01001501083374 }, { "id": "identity_210031", "image": "TextVQA/images/caaf42e872ce47fb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the court?" }, { "from": "gpt", "value": "The answer is Ten" } ], "clip_score": 21.546875, "loss": 4.144343852996826 }, { "id": "identity_210196", "image": "A-OKVQA/images/33505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many gaming controllers are visible on the floor?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.625, "loss": 4.970395088195801 }, { "id": "identity_247690", "image": "A-OKVQA/images/262609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company whose logo is on the left building, and what type of products is this company most known for?" }, { "from": "gpt", "value": "The answer is Apple Inc., electronic devices" } ], "clip_score": 18.421875, "loss": 4.211068630218506 }, { "id": "identity_9503", "image": "FigureQA/images/78434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.082823276519775 }, { "id": "identity_284492", "image": "IconQA/images/28246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have an open or closed structure?\nChoices:\n(A) closed\n(B) open" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.330180644989014 }, { "id": "identity_297231", "image": "TQA/images/question_images/erosion_6850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options collapses and leads to a fall? \n(A) plunge pool\n(B) overhang\n(C) pebbles and stones\n(D) soft rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.859375, "loss": 4.548736095428467 }, { "id": "identity_271490", "image": "PMC-VQA/images/PMC4551529_Fig2_418030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image labeled \"iii ghost tangle\" depict?\nChoices:\n(A) Intense tau staining of PHFs \n(B) Absence of PHFs \n(C) Ghost tangles with lightly staining PHFs \n(D) Ghost tangles with robust staining of fibrous PHFs." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 4.5697736740112305 }, { "id": "identity_268832", "image": "PMC-VQA/images/PMC6032807_F14_338297.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique showed the sclerosis of the manubrium sterni with medial clavicles? Is it:\n(A) MRI\n(B) X-ray\n(C) CT\n(D) PET." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.046875, "loss": 4.064097881317139 }, { "id": "identity_315592", "image": "PMC-VQA/images/PMC8127231_Fig1_443553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the foreign material indicated in the image?\nChoices:\n(A) Fecal material\n(B) Contrast dye\n(C) Air bubbles\n(D) Unspecified material" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.53125, "loss": 4.730301856994629 }, { "id": "identity_299985", "image": "PMC-VQA/images/PMC4677348_f3_453251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which method of reconstruction was utilized in the study?\nChoices:\n(A) MRI\n(B) CT scan\n(C) PET scan\n(D) δ/β imaging" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.28125, "loss": 4.5441179275512695 }, { "id": "identity_303478", "image": "A-OKVQA/images/3823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the middle of the area?\nChoices:\n(A) cat\n(B) elephant\n(C) dog\n(D) laptop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.84375, "loss": 4.187867641448975 }, { "id": "identity_272145", "image": "A-OKVQA/images/428754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what surface is the person standing? \n(A) snow \n(B) water \n(C) sticks \n(D) concrete" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.421875, "loss": 4.4901299476623535 }, { "id": "identity_35204", "image": "PMC-VQA/images/PMC3744490_pone-0072265-g004_224985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow color represent in the coded images? \nChoices:\n(A) Charcoal volume with the largest volume \n(B) Charcoal volume with the smallest volume \n(C) Charcoal volume with an average volume \n(D) Charcoal volume with unknown volume. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.703125, "loss": 4.2999091148376465 }, { "id": "identity_316744", "image": "FigureQA/images/42432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color less than Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.390625, "loss": 4.017153739929199 }, { "id": "identity_324809", "image": "PMC-VQA/images/PMC4009138_Fig3_285691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where did the FB move in image e?\nChoices:\n(A) Left paraesophageal adipose space \n(B) Esophageal lumen \n(C) Right paraesophageal adipose space \n(D) Mediastinum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.249419212341309 }, { "id": "identity_119685", "image": "VQA2.0/images/528971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many French fries can be seen on the plate?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.140625, "loss": 4.552700519561768 }, { "id": "identity_313758", "image": "PMC-VQA/images/PMC7028369_fig2s2_120390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the cells is depicted in the images?\nChoices:\n(A) Dendrites\n(B) Axons\n(C) Soma\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.390625, "loss": 4.998996257781982 }, { "id": "identity_254495", "image": "A-OKVQA/images/536145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many letters are there in the word between \"see\" and \"now\" on the person's back, and what is the word?" }, { "from": "gpt", "value": "The answer is 2, ME" } ], "clip_score": 12.3203125, "loss": 4.221651554107666 }, { "id": "identity_331911", "image": "A-OKVQA/images/432527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is graphite used in making?\nChoices:\n(A) ball\n(B) shoe\n(C) net\n(D) racket" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.390625, "loss": 4.15216064453125 }, { "id": "identity_285085", "image": "PMC-VQA/images/PMC6053234_pone.0201064.g001_343863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the patient, which brain region exhibited collapsed lobes?\nChoices:\n(A) Parietal lobes\n(B) Temporal lobes\n(C) Frontal lobes\n(D) Occipital lobes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.03125, "loss": 4.41007661819458 }, { "id": "identity_275879", "image": "PMC-VQA/images/PMC5348553_Fig6_139017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object being indicated by the arrowhead in the image?\nChoices:\n(A) A normal cell \n(B) A cancer cell \n(C) An infected cell with mitochondrial staining \n(D) An infected cell with caspase-6 staining." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.171875, "loss": 4.207075119018555 }, { "id": "identity_520", "image": "A-OKVQA/images/137377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is she taking a photo of?\nChoices:\n(A) yarn\n(B) potato\n(C) doll\n(D) ball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.796875, "loss": 4.235100269317627 }, { "id": "identity_46298", "image": "DocVQA/images/kggn0226_36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of film copies are indicated in the document?" }, { "from": "gpt", "value": "The answer is Originals" } ], "clip_score": 23.734375, "loss": 5.096119403839111 }, { "id": "identity_26824", "image": "PMC-VQA/images/PMC6085685_Fig2_352459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is present in the mandibular canals? \nChoices:\n(A) they are narrowed \n(B) they are absent \n(C) they are widened \n(D) they are twisted " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.078125, "loss": 5.172725677490234 }, { "id": "identity_308697", "image": "PMC-VQA/images/PMC7319815_ytaa086-F1_200240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of prosthesis is depicted in the image?\nChoices:\n(A) Biological Prosthesis\n(B) Mechanical Prosthesis\n(C) Tissue-engineered Prosthesis\n(D) Autograft Prosthesis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.328125, "loss": 4.447083473205566 }, { "id": "identity_322306", "image": "A-OKVQA/images/144618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hat is the person in the air wearing?\nChoices:\n(A) beanie\n(B) fedora\n(C) baseball cap\n(D) derby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.125, "loss": 4.168928146362305 }, { "id": "identity_283844", "image": "A-OKVQA/images/190509.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options aids in the communication between transponders and the cab and train control systems? \n(A) network\n(B) wire\n(C) signal\n(D) cab" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.203125, "loss": 4.216329097747803 }, { "id": "identity_2193", "image": "A-OKVQA/images/288229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is this?\nChoices:\n(A) commercial\n(B) rural\n(C) country\n(D) residential" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.8203125, "loss": 4.348605155944824 }, { "id": "identity_332896", "image": "A-OKVQA/images/12731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trains are depicted here?\nChoices:\n(A) miniature\n(B) electric\n(C) tram\n(D) diesel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.609375, "loss": 4.166093349456787 }, { "id": "identity_288201", "image": "A-OKVQA/images/390213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the green sign?\nChoices:\n(A) plug in\n(B) give all\n(C) move mentally\n(D) say prayer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.921875, "loss": 4.162114143371582 }, { "id": "identity_116953", "image": "PMC-VQA/images/PMC4551529_Fig2_418030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many nuclei can be counted in the image?" }, { "from": "gpt", "value": "The answer is Seven nuclei" } ], "clip_score": 19.25, "loss": 4.215989589691162 }, { "id": "identity_8064", "image": "FigureQA/images/40568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid greater than Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.59375, "loss": 4.192408084869385 }, { "id": "identity_317169", "image": "PMC-VQA/images/PMC4369826_Fig2_370506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tumor is observed in this ultrasound image?\nChoices:\n(A) Predominantly cystic\n(B) Predominantly solid\n(C) Half cystic and half solid\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.03125, "loss": 4.174384117126465 }, { "id": "identity_189961", "image": "PMC-VQA/images/PMC7138870_Fig10_146243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the red arrow pointing to?" }, { "from": "gpt", "value": "The answer is Stenosis" } ], "clip_score": 17.453125, "loss": 4.186694145202637 }, { "id": "identity_17213", "image": "DVQA/images/bar_train_00121067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left?" }, { "from": "gpt", "value": "The answer is notion" } ], "clip_score": 22.96875, "loss": 6.034277439117432 }, { "id": "identity_28577", "image": "PMC-VQA/images/PMC7886037_acn351277-fig-0004_368929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the anomaly shown in the image? \nChoices:\n(A) Arachnoid cysts \n(B) Hydrocephalus \n(C) Dural venous anomaly \n(D) Cerebral edema " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.234375, "loss": 4.455031394958496 }, { "id": "identity_330010", "image": "A-OKVQA/images/172924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which continent is this train located?\nChoices:\n(A) north america\n(B) europe\n(C) africa\n(D) asia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.671875, "loss": 4.04715633392334 }, { "id": "identity_101271", "image": "IconQA/images/9458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this shape have any vertices?" }, { "from": "gpt", "value": "The answer is No vertices" } ], "clip_score": 21.890625, "loss": 4.335778713226318 }, { "id": "identity_10871", "image": "AI2D/images/313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the diagram, grasshoppers are food for:\nChoices:\n(A) Hawks\n(B) Snakes\n(C) Frogs\n(D) Other grasshoppers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.21875, "loss": 4.153359889984131 }, { "id": "identity_273649", "image": "VizWiz/images/VizWiz_train_00014937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be seen in this picture?" }, { "from": "gpt", "value": "The answer is facebook messenger dog rose in mouth" } ], "clip_score": 21.421875, "loss": 5.5258097648620605 }, { "id": "identity_286539", "image": "VizWiz/images/VizWiz_train_00004425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe what is shown in the image?" }, { "from": "gpt", "value": "The answer is remote control" } ], "clip_score": 24.265625, "loss": 5.155412197113037 }, { "id": "identity_318797", "image": "ScienceQA/images/12736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) fragile\n(B) blue\n(C) bouncy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3125, "loss": 4.288363456726074 }, { "id": "identity_14180", "image": "ScienceQA/images/6356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Virginia?\nChoices:\n(A) Oklahoma City\n(B) Richmond\n(C) Arlington\n(D) Portland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.0625, "loss": 4.234625339508057 }, { "id": "identity_291165", "image": "TextVQA/images/03e77cd31f9d6306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the man in the front wearing green?" }, { "from": "gpt", "value": "The answer is benedita" } ], "clip_score": 14.6640625, "loss": 5.767880916595459 }, { "id": "identity_287532", "image": "Super-CLEVR/images/superCLEVR_new_000861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does a red object exist with the same shape as the large yellow object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 13.5078125, "loss": 4.978430271148682 }, { "id": "identity_222394", "image": "Super-CLEVR/images/superCLEVR_new_001938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 19.671875, "loss": 4.095223903656006 }, { "id": "identity_263593", "image": "AI2D/images/3002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region on the diagram corresponds to the leaflet?\nChoices:\n(A) b\n(B) a\n(C) d\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.014433860778809 }, { "id": "identity_268958", "image": "TQA/images/abc_question_images/human_system_eye_16135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the optic nerve located?\nChoices:\n(A) N\n(B) H\n(C) A\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.828125, "loss": 4.374095916748047 }, { "id": "identity_27311", "image": "A-OKVQA/images/461805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the event shown in the picture?\nChoices:\n(A) food festival\n(B) carnival\n(C) car parking\n(D) car show" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.5859375, "loss": 4.080157279968262 }, { "id": "identity_1948", "image": "VQA2.0/images/143783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the dog sitting?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.625, "loss": 4.270089626312256 }, { "id": "identity_305958", "image": "AI2D/images/4899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the volcano is option A?\nChoices:\n(A) Magma\n(B) Main Vent\n(C) Crater\n(D) Magma Chamber" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.5625, "loss": 4.395153045654297 }, { "id": "identity_278724", "image": "VizWiz/images/VizWiz_train_00010302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is visible in this picture?" }, { "from": "gpt", "value": "The answer is slider for audio word entsperren" } ], "clip_score": 20.765625, "loss": 4.532048225402832 }, { "id": "identity_64472", "image": "AI2D/images/4390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves have a netlike vein pattern?" }, { "from": "gpt", "value": "The answer is Two leaves" } ], "clip_score": 25.375, "loss": 4.952735900878906 }, { "id": "identity_320969", "image": "PMC-VQA/images/PMC8816609_fig4_184593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in this image?\nChoices:\n(A) X-ray imaging \n(B) Transmission electron microscopy \n(C) Scanning electron microscopy \n(D) Optical microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.765625, "loss": 4.2748589515686035 }, { "id": "identity_111546", "image": "Super-CLEVR/images/superCLEVR_new_000400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly to the left of the large gray metallic motorbike?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 23.359375, "loss": 4.694749355316162 }, { "id": "identity_24106", "image": "A-OKVQA/images/221328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of garment that the woman in blue is wearing?\nChoices:\n(A) raincoat\n(B) chut thai\n(C) kimono\n(D) sari" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.03125, "loss": 4.332418441772461 }, { "id": "identity_240550", "image": "VQA2.0/images/381282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different species of animals can be identified in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.28125, "loss": 4.3740129470825195 }, { "id": "identity_315339", "image": "TQA/images/question_images/earth_eclipses_1656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body lies in the umbra of this event?\nChoices:\n(A) Sun\n(B) Moon\n(C) Mars\n(D) Earth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.46875, "loss": 4.057356357574463 }, { "id": "identity_269806", "image": "FigureQA/images/18066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Seafoam or Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.78125, "loss": 4.554551601409912 }, { "id": "identity_123799", "image": "VQA2.0/images/5115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cat visible in the image?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 16.453125, "loss": 4.976799964904785 }, { "id": "identity_276350", "image": "Super-CLEVR/images/superCLEVR_new_001912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the shiny wagon in front of the large metal wagon the same as the color of the metal wagon behind the tiny wagon?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.953125, "loss": 5.24221134185791 }, { "id": "identity_336505", "image": "A-OKVQA/images/451793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is in the most danger?\nChoices:\n(A) back person\n(B) individual\n(C) driver\n(D) right person" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.0625, "loss": 4.222434043884277 }, { "id": "identity_318462", "image": "PMC-VQA/images/PMC7880116_f0003_366918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this portable chest X-ray show?\nChoices:\n(A) Enlarged liver \n(B) Heart disease \n(C) Lung cancer \n(D) Brain tumor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.109375, "loss": 4.776623249053955 }, { "id": "identity_295475", "image": "PMC-VQA/images/PMC7911842_diagnostics-11-00169-f001_375905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of modality is displayed in the plot on the far left? \nOptions:\n(A) contrast-enhanced T1\n(B) T2\n(C) T1\n(D) Flair" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.7421875, "loss": 4.072007179260254 }, { "id": "identity_292746", "image": "PMC-VQA/images/PMC6536547_Fig8_485626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the magnified view of the rectangular dotted area at the center of the image?\nChoices:\n(A) Bacteria \n(B) Fungi \n(C) Plant cells \n(D) None of the above." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.84375, "loss": 4.3491082191467285 }, { "id": "identity_268375", "image": "VizWiz/images/VizWiz_train_00014687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide me with more information about the picture?" }, { "from": "gpt", "value": "The answer is black rectangle small white rectangles" } ], "clip_score": 23.703125, "loss": 4.401396751403809 }, { "id": "identity_32357", "image": "PMC-VQA/images/PMC6947777_f1_96231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white line represent in the figure? \nChoices:\n(A) The border of the image \n(B) The area affected by a particular disease \n(C) The trajectory of the scanner \n(D) The location of a medical facility " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.640625, "loss": 4.639113426208496 }, { "id": "identity_15926", "image": "ScienceQA/images/10253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) slippery\n(B) hard\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.296875, "loss": 4.168553352355957 }, { "id": "identity_194757", "image": "PMC-VQA/images/PMC4623697_F3_438097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the central region of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.53125, "loss": 4.243101596832275 }, { "id": "identity_21873", "image": "IconQA/images/60315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.3464837074279785 }, { "id": "identity_267495", "image": "A-OKVQA/images/428440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices serves as a figurehead in this scenario? \n(A) snake \n(B) dragon \n(C) goat \n(D) drum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.4140625, "loss": 4.485690116882324 }, { "id": "identity_334503", "image": "PMC-VQA/images/PMC8319602_F1_22092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vein was preserved during the hepatic resection shown in the image? \nChoices:\n(A) Left hepatic vein (LHV) \n(B) Right hepatic vein (RHV) \n(C) Middle hepatic vein (MHV) \n(D) Portal vein (PV)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 4.1728949546813965 }, { "id": "identity_207047", "image": "VQA-AS/images/7584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are in the sky?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.421875, "loss": 4.419070243835449 }, { "id": "identity_1836", "image": "DocVQA/images/gjbl0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where's the DESTINATION?" }, { "from": "gpt", "value": "The answer is China/Orient" } ], "clip_score": 15.3203125, "loss": 4.968562126159668 }, { "id": "identity_263932", "image": "TQA/images/question_images/glaciers_6928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which section of the diagram is the most probable location for sediment deposition?\nChoices:\n(A) C\n(B) E\n(C) A\n(D) B" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.796875, "loss": 4.085725784301758 }, { "id": "identity_220605", "image": "A-OKVQA/images/172924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.984375, "loss": 4.669492721557617 }, { "id": "identity_261517", "image": "FigureQA/images/83415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive Drab the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.828125, "loss": 4.729300498962402 }, { "id": "identity_7800", "image": "Super-CLEVR/images/superCLEVR_new_001250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the brown motorbike the same as the brown rubber fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.953125, "loss": 4.870211124420166 }, { "id": "identity_187753", "image": "VQA-AS/images/14669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object on the ground to the right of the person?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.421875, "loss": 4.072441101074219 }, { "id": "identity_323953", "image": "A-OKVQA/images/117601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What meal is this most likely?\nChoices:\n(A) lunch\n(B) desert\n(C) breakfast\n(D) snack" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.609375, "loss": 4.185020446777344 }, { "id": "identity_106938", "image": "VQA2.0/images/448609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 11.6171875, "loss": 4.602806568145752 }, { "id": "identity_312093", "image": "A-OKVQA/images/376590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What maneuver is the person in red performing?\nChoices:\n(A) grind\n(B) front spin\n(C) hand plant\n(D) big air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.953125, "loss": 4.337015628814697 }, { "id": "identity_21744", "image": "A-OKVQA/images/245598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the boy in red wearing a glove?\nChoices:\n(A) fashion\n(B) health\n(C) warmth\n(D) to catch" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.765625, "loss": 4.379464149475098 }, { "id": "identity_110798", "image": "A-OKVQA/images/42953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing helmets in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.875, "loss": 4.981448173522949 }, { "id": "identity_104373", "image": "Super-CLEVR/images/superCLEVR_new_000486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.734375, "loss": 4.142851829528809 }, { "id": "identity_16490", "image": "DVQA/images/bar_train_00143914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.25, "loss": 4.168064594268799 }, { "id": "identity_140303", "image": "DocVQA/images/lmwn0226_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many solutions are provided for \"Soft Tissue Laxity\"?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.796875, "loss": 4.364402770996094 }, { "id": "identity_299221", "image": "AI2D/images/629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term corresponds to label C in the given options: seed, root, adult plant, or young plant?\nChoices:\n(A) seed\n(B) root\n(C) adult plant\n(D) young plant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.15625, "loss": 4.658143043518066 }, { "id": "identity_285031", "image": "A-OKVQA/images/368148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object in the picture is the tallest among the options provided: the boy, buildings, bus, or clock tower?\nChoices:\n(A) boy\n(B) buildings\n(C) bus\n(D) clock tower" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.171875, "loss": 5.171987056732178 }, { "id": "identity_2005", "image": "A-OKVQA/images/133964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bird is on the red white and green barrel?\nChoices:\n(A) eagle\n(B) swan\n(C) pelican\n(D) rooster" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.2421875, "loss": 4.118136405944824 }, { "id": "identity_37941", "image": "A-OKVQA/images/372804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The woman wearing the conical hat is a denizen of which nation?\nChoices:\n(A) vietnam\n(B) thailand\n(C) japan\n(D) china" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.296875, "loss": 4.192030906677246 }, { "id": "identity_10432", "image": "ScienceQA/images/9924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Maryland\n(B) North Carolina\n(C) Georgia\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.625, "loss": 4.036019325256348 }, { "id": "identity_307300", "image": "VizWiz/images/VizWiz_train_00019857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is blink gel tears" } ], "clip_score": 36.46875, "loss": 4.793115615844727 }, { "id": "identity_301043", "image": "FigureQA/images/69909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.90625, "loss": 4.0260419845581055 }, { "id": "identity_309652", "image": "Super-CLEVR/images/superCLEVR_new_001113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the large thing that is both right of the metal tandem bike and behind the large truck made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 24.609375, "loss": 4.5788044929504395 }, { "id": "identity_325903", "image": "PMC-VQA/images/PMC7382807_Fig2_220368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the imaging tests indicate?\nChoices:\n(A) They do not show any fractures\n(B) They do not show any abnormalities\n(C) They show a talocalcaneal coalition\n(D) They show a bony protuberance fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.8125, "loss": 4.343120098114014 }, { "id": "identity_206554", "image": "PMC-VQA/images/PMC4719561_Fig2_463508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the imaging shown in the picture taken from the front or the side of the body?" }, { "from": "gpt", "value": "The answer is Front" } ], "clip_score": 29.5, "loss": 4.384265899658203 }, { "id": "identity_320197", "image": "PMC-VQA/images/PMC5821967_fig1_278985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is depicted here?\nChoices:\n(A) Axial plane T2-weighted image \n(B) Gadolinium-enhanced T1-weighted image \n(C) Coronal plane T1-weighted image \n(D) Sagittal plane T2-weighted image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.324617385864258 }, { "id": "identity_277351", "image": "DVQA/images/bar_train_00000775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars from the bottom is labeled as the fourth?" }, { "from": "gpt", "value": "The answer is yield" } ], "clip_score": 23.34375, "loss": 4.380892276763916 }, { "id": "identity_202565", "image": "PMC-VQA/images/PMC6408898_RSPB20182733F2_447708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain slices are shown in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.40625, "loss": 4.029445648193359 }, { "id": "identity_29449", "image": "AI2D/images/3882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the middle of the leaf called?\nChoices:\n(A) scar\n(B) midrib\n(C) none of the above\n(D) lenticel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.162662506103516 }, { "id": "identity_332800", "image": "PMC-VQA/images/PMC3599802_F2_192545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is marked in blue in the image?\nChoices:\n(A) Planning target volume\n(B) Clinical target volume\n(C) Gross tumor volume\n(D) Target volume for treatment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.375, "loss": 4.000402927398682 }, { "id": "identity_205131", "image": "TextVQA/images/0204574848b0bf05.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many signs can be seen thrown in the trashcan?" }, { "from": "gpt", "value": "The answer is Three signs" } ], "clip_score": 26.09375, "loss": 4.497940540313721 }, { "id": "identity_278338", "image": "AI2D/images/1634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region on the diagram represents the penumbra?\nChoices:\n(A) a\n(B) b\n(C) d\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.515625, "loss": 4.091610908508301 }, { "id": "identity_151844", "image": "ScienceQA/images/1273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 27.78125, "loss": 4.189545631408691 }, { "id": "identity_132758", "image": "VizWiz/images/VizWiz_train_00013464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object in the background with circular elements?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 12.515625, "loss": 4.107338905334473 }, { "id": "identity_112231", "image": "VQA2.0/images/526098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is the large stuffed toy hanging above the store entrance?" }, { "from": "gpt", "value": "The answer is Dog" } ], "clip_score": 23.640625, "loss": 4.146139621734619 }, { "id": "identity_250540", "image": "PMC-VQA/images/PMC5583184_Fig5_211156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the displayed structure in the reference images, and how does it relate to motor control?" }, { "from": "gpt", "value": "The answer is It transmits motor commands for voluntary muscle control" } ], "clip_score": 27.421875, "loss": 4.1469407081604 }, { "id": "identity_218424", "image": "PMC-VQA/images/PMC6951026_Fig5_97110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely reason for the medical procedure that resulted in this image being taken?" }, { "from": "gpt", "value": "The answer is To investigate blood vessel abnormalities" } ], "clip_score": 28.5625, "loss": 4.094363689422607 }, { "id": "identity_262978", "image": "A-OKVQA/images/578705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is directed towards the man on the motorcycle?\nChoices:\n(A) camera\n(B) antique blunderbuss\n(C) paintball\n(D) ruler" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.421875, "loss": 4.167685031890869 }, { "id": "identity_10455", "image": "VQA2.0/images/109654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shadows are on the ground?" }, { "from": "gpt", "value": "The answer is lots" } ], "clip_score": 12.6796875, "loss": 4.568436622619629 }, { "id": "identity_334225", "image": "Super-CLEVR/images/superCLEVR_new_000785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of metal bicycles behind the brown car and the blue objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.484375, "loss": 5.234293460845947 }, { "id": "identity_336397", "image": "ScienceQA/images/5334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this flower pot made of?\nChoices:\n(A) cotton\n(B) ceramic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.21346378326416 }, { "id": "identity_305672", "image": "TQA/images/abc_question_images/human_system_circulatory_11381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to this organ?\nChoices:\n(A) L\n(B) R\n(C) C\n(D) W" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.84375, "loss": 4.154933929443359 }, { "id": "identity_278354", "image": "AI2D/images/2065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web diagram, which organisms in the following choices are considered primary consumers?\n(A) wolves and owls\n(B) Deer and rodents\n(C) grass and fungi\n(D) owls and foxes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.140625, "loss": 4.472919940948486 }, { "id": "identity_265663", "image": "TQA/images/question_images/photosynthesis_1307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is responsible for photosynthesis among the following choices: Flower, Leaf, Root, or Fruit?\nChoices:\n(A) Flower\n(B) Leaf\n(C) Root\n(D) Fruit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.265625, "loss": 4.0884904861450195 }, { "id": "identity_197665", "image": "A-OKVQA/images/479948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umpire's shirt?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.6875, "loss": 4.168058395385742 }, { "id": "identity_336398", "image": "VQA2.0/images/404015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the picture monochrome?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.296875, "loss": 4.004289150238037 }, { "id": "identity_332245", "image": "PMC-VQA/images/PMC8105406_Fig1_435784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this radiograph (C) show at 5 years of age?\nChoices:\n(A) Normal hip development\n(B) Hip dysplasia with a flat and irregular acetabular roof\n(C) Fractured hip\n(D) Dislocated hip joint." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.859375, "loss": 4.096002101898193 }, { "id": "identity_330539", "image": "ScienceQA/images/18112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Observe the image. Which word best describes the taste of this substance?\nChoices:\n(A) sweet\n(B) bitter\n(C) minty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.0, "loss": 4.118786811828613 }, { "id": "identity_328222", "image": "PMC-VQA/images/PMC5841142_awx262-F4_284852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the Kapp maps show?\nChoices:\n(A) Tissue type \n(B) Progression of lesion \n(C) Contrast enhancement \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.125, "loss": 4.3081583976745605 }, { "id": "identity_283313", "image": "PMC-VQA/images/PMC4149355_pone-0099655-g001_316615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the white line in the figure? \nChoices:\n(A) Start of the observation \n(B) End of the observation \n(C) Middle of the observation \n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.84375, "loss": 4.387818336486816 }, { "id": "identity_329889", "image": "TabMWP/images/13290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Sam checked his calendar to see how much time he spent babysitting each month. In which month did Sam babysit the least amount of time?\nChoices:\n(A) May\n(B) June\n(C) July\n(D) August" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.5, "loss": 4.0530500411987305 }, { "id": "identity_29193", "image": "TextVQA/images/4a58ce5454cef92d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what team is on the left?" }, { "from": "gpt", "value": "The answer is national" } ], "clip_score": 20.734375, "loss": 4.484675407409668 }, { "id": "identity_200861", "image": "A-OKVQA/images/435096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many riders are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.59375, "loss": 4.306394100189209 }, { "id": "identity_287722", "image": "A-OKVQA/images/430688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the current direction of movement of the bear shown in the image? \n(A) down\n(B) up\n(C) forward\n(D) none" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.625, "loss": 4.723111629486084 }, { "id": "identity_26052", "image": "PMC-VQA/images/PMC5677349_FIG1_237126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis for the condition? \nChoices:\n(A) Acute cholecystitis \n(B) Aortic aneurysm \n(C) Perforated appendicitis \n(D) Esophageal cancer " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.015625, "loss": 4.238837242126465 }, { "id": "identity_271498", "image": "PMC-VQA/images/PMC4028266_pone-0098088-g008_290149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image provided? \nOptions:\n(A) Cross-section of an ovary\n(B) Apical portion of an ovariole\n(C) Apical portion of the oviduct\n(D) Section of the uterus." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.026858806610107 }, { "id": "identity_320177", "image": "DocVQA/images/ssvx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is trying to influence the selection of the Democratic party presidential candidate?" }, { "from": "gpt", "value": "The answer is union leadership" } ], "clip_score": 19.421875, "loss": 4.057814121246338 }, { "id": "identity_299419", "image": "ScienceQA/images/9661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Ohio?\nChoices:\n(A) Charleston\n(B) Columbus\n(C) Green Bay\n(D) Madison" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.75, "loss": 4.224897861480713 }, { "id": "identity_132165", "image": "VizWiz/images/VizWiz_train_00012361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.59375, "loss": 4.177075386047363 }, { "id": "identity_17500", "image": "PMC-VQA/images/PMC7249623_Fig3_178029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the small arrow indicate in the ultrasonographic image? \nChoices:\n(A) Tendon \n(B) Blood vessel \n(C) Bone \n(D) Hypertrophied muscle. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.078125, "loss": 4.7261152267456055 }, { "id": "identity_37650", "image": "A-OKVQA/images/33572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be the best use for this type of bike?\nChoices:\n(A) racing\n(B) cruising\n(C) jumps\n(D) tricks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.1875, "loss": 4.992146968841553 }, { "id": "identity_297251", "image": "A-OKVQA/images/389021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended use of the large sign?\nChoices:\n(A) advertisement\n(B) warning\n(C) identification\n(D) direction" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5078125, "loss": 4.56098747253418 }, { "id": "identity_292553", "image": "Super-CLEVR/images/superCLEVR_new_001187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the color of the rubber fighter and the tiny matte car identical?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.71875, "loss": 5.077506065368652 }, { "id": "identity_293673", "image": "PMC-VQA/images/PMC4357181_Fig2_366923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what do the letters st represent?\nChoices:\n(A) tentacles\n(B) mouth\n(C) anus\n(D) stomach" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.265625, "loss": 4.533596038818359 }, { "id": "identity_14872", "image": "PMC-VQA/images/PMC5570225_fig2_206568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown as the orange box in the inset image? \nChoices:\n(A) convoluted PM \n(B) anterior midgut region \n(C) posterior midgut region \n(D) complex membrane folding. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0, "loss": 4.663393020629883 }, { "id": "identity_264694", "image": "AI2D/images/2884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the dentin?\nOptions:\n(A) E\n(B) G\n(C) C\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.9375, "loss": 4.4410505294799805 }, { "id": "identity_303505", "image": "Super-CLEVR/images/superCLEVR_new_001400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any gray bus to the left of the red rubber biplane with the dirtbike on its left side?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.796875, "loss": 5.599850177764893 }, { "id": "identity_14443", "image": "A-OKVQA/images/416337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fruits are the least sweet?\nChoices:\n(A) biggest\n(B) green\n(C) most freckled\n(D) smallest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.96875, "loss": 4.1285481452941895 }, { "id": "identity_188667", "image": "VQA2.0/images/520199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible laptops have a visible Apple logo?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.640625, "loss": 5.072446823120117 }, { "id": "identity_283714", "image": "A-OKVQA/images/20935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for following someone you are attracted to?\nChoices:\n(A) stalking\n(B) knowledge\n(C) comradery\n(D) friendship" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3515625, "loss": 4.364602565765381 }, { "id": "identity_273189", "image": "AI2D/images/3521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is found at the center of the nucleus?\nChoices:\n(A) Cell membrane\n(B) Cilia\n(C) Nucleolus\n(D) Nuclear envelope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.671875, "loss": 4.043886184692383 }, { "id": "identity_290856", "image": "A-OKVQA/images/44065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to create the stick on the woman's shoulder? Is it:\n(A) metal\n(B) bamboo\n(C) wood\n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.289211273193359 }, { "id": "identity_209270", "image": "PMC-VQA/images/PMC6855517_F1_68279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label at the bottom of the image?" }, { "from": "gpt", "value": "The answer is It is a sequence or series number \"B3" } ], "clip_score": 13.921875, "loss": 4.2463812828063965 }, { "id": "identity_297465", "image": "PMC-VQA/images/PMC4450475_Fig5_391774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the image?\nOptions:\n(A) A post-operative high-resolution temporal bone CT scan.\n(B) A surgical procedure for the temporal bone.\n(C) Bony overhang around the round window.\n(D) Abnormalities in the inner ear." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.527964115142822 }, { "id": "identity_135244", "image": "DVQA/images/bar_train_00190801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the \"safety\" store?" }, { "from": "gpt", "value": "The answer is bond" } ], "clip_score": 30.0625, "loss": 4.949897289276123 }, { "id": "identity_295892", "image": "TabMWP/images/28618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day did the bus driver notice the lowest number of passengers on her bus?\nChoices:\n(A) Thursday\n(B) Friday\n(C) Saturday\n(D) Sunday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 38.0, "loss": 4.267573833465576 }, { "id": "identity_331273", "image": "IconQA/images/53675.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.7301530838012695 }, { "id": "identity_248890", "image": "TextVQA/images/00672399e1e9e074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the series number of the Detecto clock, and what is the sum of the digits in this series number?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.71875, "loss": 4.159331798553467 }, { "id": "identity_189896", "image": "PMC-VQA/images/PMC7414880_Fig2_231383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the asterisks at the bottom of the image?" }, { "from": "gpt", "value": "The answer is They indicate statistical significance" } ], "clip_score": 19.21875, "loss": 4.496504306793213 }, { "id": "identity_298262", "image": "A-OKVQA/images/383185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is renowned for exporting the fruit currently on the counter? \n(A) Ecuador\n(B) Japan\n(C) China\n(D) Kazakhstan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.78125, "loss": 4.194928169250488 }, { "id": "identity_125144", "image": "VQA2.0/images/434894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the wine bottle?" }, { "from": "gpt", "value": "The answer is Tasca" } ], "clip_score": 9.53125, "loss": 4.890564441680908 }, { "id": "identity_211640", "image": "TextVQA/images/038dc16c44629f3c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant material of the wheel?" }, { "from": "gpt", "value": "The answer is Cast iron or similar metal" } ], "clip_score": 29.421875, "loss": 4.430430889129639 }, { "id": "identity_279464", "image": "AI2D/images/1971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of consumers would likely starve first if there were no more secondary consumers present? \n(A) Tertiary consumers\n(B) Primary consumers\n(C) Primary producers\n(D) Quaternary consumers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.296875, "loss": 4.4558820724487305 }, { "id": "identity_291780", "image": "PMC-VQA/images/PMC5854565_Fig1_289552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nOptions:\n(A) A CT scan\n(B) An ultrasound\n(C) An x-ray\n(D) An MRI scan." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.578125, "loss": 4.282229423522949 }, { "id": "identity_298063", "image": "PMC-VQA/images/PMC5487236_f1_183519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the hypervascular nodule situated?\nChoices:\n(A) Posterosuperior wall\n(B) Lateral wall\n(C) Anteroinferior wall\n(D) Posteroinferior wall" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.265625, "loss": 4.197094917297363 }, { "id": "identity_77286", "image": "ScienceQA/images/15797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many peaks can be identified in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.9375, "loss": 4.443565845489502 }, { "id": "identity_278773", "image": "PMC-VQA/images/PMC8841721_F2_193712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the light microscopy image? \nOptions:\n(A) Podocyte hypertrophy (yellow arrow) \n(B) Segmental sclerosis \n(C) Both of the above \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.890625, "loss": 4.635697841644287 }, { "id": "identity_9147", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f006_203201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sacroiliac joint shows anterior joint widening in the image? \nChoices:\n(A) Left \n(B) Right \n(C) Both Left and Right \n(D) None of them " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.28125, "loss": 4.235663414001465 }, { "id": "identity_303246", "image": "TQA/images/question_images/human_system_digestive_6087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is directly connected to this?\n\nChoices:\n(A) Gallbladder\n(B) Pancreas\n(C) Liver \n(D) Stomach" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.625, "loss": 4.215277671813965 }, { "id": "identity_334489", "image": "VQA2.0/images/390345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gender of the individual in the image?" }, { "from": "gpt", "value": "The answer is man" } ], "clip_score": 16.4375, "loss": 5.547192573547363 }, { "id": "identity_187698", "image": "A-OKVQA/images/477700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is the airplane suspended from?" }, { "from": "gpt", "value": "The answer is A metal framework" } ], "clip_score": 25.890625, "loss": 4.249890327453613 }, { "id": "identity_37180", "image": "PMC-VQA/images/PMC6425693_F6_454105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the snapshots of 3D reconstructions? \nChoices:\n(A) ASC specks within RGCs \n(B) ASC specks within microglia \n(C) ASC specks colocalizing with astroglia \n(D) ASC specks outside of the retina " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.5625, "loss": 4.773142337799072 }, { "id": "identity_306017", "image": "ScienceQA/images/8866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this food web, which organism contains matter that eventually moves to this?\nChoices:\n(A) Arctic fox\n(B) collared lemming\n(C) barren-ground caribou\n(D) bear sedge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.671875, "loss": 4.472141265869141 }, { "id": "identity_1740", "image": "A-OKVQA/images/462037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the girls steering wheels control?\nChoices:\n(A) car\n(B) barbie house\n(C) video game\n(D) toy truck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.796875, "loss": 4.070314407348633 }, { "id": "identity_212696", "image": "VQA2.0/images/440903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umbrella closest to the water body on the right side of the image?" }, { "from": "gpt", "value": "The answer is Blue and yellow" } ], "clip_score": 22.34375, "loss": 4.3941874504089355 }, { "id": "identity_271336", "image": "ScienceQA/images/15236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Orlando\n(C) Baton Rouge\n(D) Tampa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.828125, "loss": 4.058667182922363 }, { "id": "identity_20950", "image": "PMC-VQA/images/PMC5559574_Fig2_203939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the radiograph view mentioned in the caption? \nChoices:\n(A) True profile view \n(B) False profile view \n(C) Lateral view \n(D) Posterioanterior view " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.953125, "loss": 4.3811163902282715 }, { "id": "identity_208005", "image": "PMC-VQA/images/PMC7003532_Fig5_112043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the body is marked in the image?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 21.75, "loss": 4.350138187408447 }, { "id": "identity_28125", "image": "Super-CLEVR/images/superCLEVR_new_000507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny green bicycle that is in front of the tiny bicycle that is right of the regular bus in front of the matte road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.265625, "loss": 5.437932014465332 }, { "id": "identity_8706", "image": "TQA/images/question_images/cycle_water_4978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which process is responsible for the rain?\nChoices:\n(A) Transpiration\n(B) Precipitation\n(C) Evaporation\n(D) Collection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.6875, "loss": 4.503045558929443 }, { "id": "identity_36901", "image": "A-OKVQA/images/73310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the bottom left?\nChoices:\n(A) teacup\n(B) giraffe\n(C) egg\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.7734375, "loss": 4.195552349090576 }, { "id": "identity_314422", "image": "FigureQA/images/49381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Pale Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.84375, "loss": 4.017688274383545 }, { "id": "identity_28884", "image": "PMC-VQA/images/PMC7906016_F3_374088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow in the image indicate? \nChoices:\n(A) Normal tissue, \n(B) Ischemic infarction in the right temporal lobe, \n(C) Ischemic infarction in the left occipital lobe, \n(D) Ischemic infarction in the cerebellum. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.890625, "loss": 4.304564952850342 }, { "id": "identity_78745", "image": "TQA/images/abc_question_images/skin_cross_section_17586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the hair follicle?" }, { "from": "gpt", "value": "The answer is Label J" } ], "clip_score": 31.109375, "loss": 4.26657247543335 }, { "id": "identity_262429", "image": "FigureQA/images/72794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the most rough?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.6875, "loss": 4.221351623535156 }, { "id": "identity_23732", "image": "FigureQA/images/28210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.546875, "loss": 4.30600118637085 }, { "id": "identity_281192", "image": "A-OKVQA/images/272463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the van located at the end of the left row? \n(A) red\n(B) yellow\n(C) brown\n(D) green" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.078125, "loss": 4.020228385925293 }, { "id": "identity_283113", "image": "PMC-VQA/images/PMC6032711_F2_338194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of image depicted in the caption?\nChoices:\n(A) x-ray\n(B) MRI\n(C) CT scan\n(D) ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.03125, "loss": 4.883444786071777 }, { "id": "identity_36534", "image": "PMC-VQA/images/PMC6947566_genes-10-00953-f003_96077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the OCT show for the left eye? \nChoices:\n(A) Subretinal deposits with intraretinal fluid \n(B) Intraretinal fluid with no subretinal deposit \n(C) Subretinal mass with no intraretinal fluid \n(D) Normal OCT scan. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.737827301025391 }, { "id": "identity_277452", "image": "VizWiz/images/VizWiz_train_00023803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object a box?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.8515625, "loss": 4.844309329986572 }, { "id": "identity_305401", "image": "ScienceQA/images/18831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term corresponds to this image?\nChoices:\n(A) echinoderm\n(B) cnidarian" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.375, "loss": 4.177464962005615 }, { "id": "identity_329775", "image": "A-OKVQA/images/355092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these balloons in the boat?\nChoices:\n(A) fell there\n(B) celebration\n(C) for sale\n(D) stolen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.75, "loss": 4.63630485534668 }, { "id": "identity_289723", "image": "TabMWP/images/37704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.620763301849365 }, { "id": "identity_266185", "image": "ScienceQA/images/11308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What observable trait did Asterotrygon possess based on the fossil evidence provided?\nChoices:\n(A) a wide tail fin\n(B) spots on its skin\n(C) a long, thin tail\n(D) four legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.047779083251953 }, { "id": "identity_293426", "image": "AI2D/images/4260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the regolith located in relation to the bedrock?\nChoices:\n(A) Yes, the regolith is underneath the bedrock.\n(B) No, the regolith is above the bedrock.\n(C) No, the regolith is only on the Moon.\n(D) No, the regolith is is not part of the soil." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.721413612365723 }, { "id": "identity_2060", "image": "Super-CLEVR/images/superCLEVR_new_001596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small green bus left of the green rubber thing that is behind the green object that is left of the small shiny regular bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.515625, "loss": 5.229818344116211 }, { "id": "identity_24930", "image": "ScienceQA/images/15123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) sticky\n(B) salty\n(C) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.640625, "loss": 4.086112022399902 }, { "id": "identity_205188", "image": "A-OKVQA/images/204273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main action taking place in the image?" }, { "from": "gpt", "value": "The answer is Baseball batting" } ], "clip_score": 27.109375, "loss": 4.619912624359131 }, { "id": "identity_271667", "image": "TQA/images/question_images/parts_neuron_7225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the hair-like protrusions that extend from the cell body of a neuron?\nChoices:\n(A) dendrite\n(B) axon\n(C) axon hillock\n(D) myelin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.8125, "loss": 4.242252349853516 }, { "id": "identity_332961", "image": "A-OKVQA/images/69944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the queen of this place?\nChoices:\n(A) milena trump\n(B) elizabeth ii\n(C) lukashenko\n(D) ivanka trump" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.703125, "loss": 4.660862922668457 }, { "id": "identity_21857", "image": "PMC-VQA/images/PMC6032711_F2_338194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is being shown in the caption? \nChoices:\n(A) x-ray \n(B) MRI \n(C) CT scan \n(D) ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.875, "loss": 4.340733051300049 }, { "id": "identity_16207", "image": "ScienceQA/images/20768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Kentucky?\nChoices:\n(A) Atlanta\n(B) Frankfort\n(C) Carson City\n(D) Nashville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.276023864746094 }, { "id": "identity_74869", "image": "AI2D/images/1962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in the food web?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 23.046875, "loss": 4.1930952072143555 }, { "id": "identity_117127", "image": "TextVQA/images/1eae1ad21055c615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which author's name is visible on the spine of the book located third from the right on the bottom shelf?" }, { "from": "gpt", "value": "The answer is Alan Campbell" } ], "clip_score": 27.390625, "loss": 4.2748894691467285 }, { "id": "identity_310016", "image": "A-OKVQA/images/76310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the birds doing near the animal?\nChoices:\n(A) eating\n(B) playing\n(C) dancing\n(D) attacking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.098958969116211 }, { "id": "identity_26373", "image": "ScienceQA/images/3262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) flexible\n(B) hard\n(C) fragile" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.359375, "loss": 4.260396480560303 }, { "id": "identity_4875", "image": "TQA/images/abc_question_images/photosynthesis_11262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the gas R.\nChoices:\n(A) OXYGEN\n(B) NITROGEN\n(C) CARBON DIOXIDE\n(D) HYDROGEN" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0, "loss": 4.90838623046875 }, { "id": "identity_3781", "image": "PMC-VQA/images/PMC5688175_Fig7_240165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red dashed line indicating in image e)? \nChoices:\n(A) The proximal side of the airway \n(B) The distal side of the airway \n(C) The middle of the airway \n(D) The outside of the airway. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5, "loss": 4.194802761077881 }, { "id": "identity_154136", "image": "ScienceQA/images/13107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of jewelry is depicted in the image?" }, { "from": "gpt", "value": "The answer is Ring" } ], "clip_score": 28.6875, "loss": 4.2176055908203125 }, { "id": "identity_263118", "image": "ScienceQA/images/6727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Phoenix\n(B) Tucson\n(C) Honolulu\n(D) Juneau" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.140625, "loss": 4.295317649841309 }, { "id": "identity_295156", "image": "A-OKVQA/images/1700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which century was the item the person is standing on originally from?\nChoices:\n(A) 18th\n(B) 20th\n(C) 12th\n(D) 17th" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.78125, "loss": 4.065648555755615 }, { "id": "identity_199482", "image": "VQA2.0/images/523617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the plane?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.328125, "loss": 4.1731061935424805 }, { "id": "identity_7709", "image": "ScienceQA/images/435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which i in row A?\nChoices:\n(A) the theater\n(B) the police department\n(C) the fast-food restaurant\n(D) the fire department" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.9375, "loss": 4.4748125076293945 }, { "id": "identity_38002", "image": "PMC-VQA/images/PMC4472651_Fig1_398035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the tumor located? \nChoices:\n(A) Right breast \n(B) Left breast \n(C) Both breasts \n(D) Unknown " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.000554084777832 }, { "id": "identity_296464", "image": "Super-CLEVR/images/superCLEVR_new_000612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object located behind the green utility bike and in front of the shiny chopper?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 30.234375, "loss": 4.115716457366943 }, { "id": "identity_320044", "image": "A-OKVQA/images/223095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of ship is in the water?\nChoices:\n(A) container\n(B) naval\n(C) passenger\n(D) tanker" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.2421875, "loss": 4.4520416259765625 }, { "id": "identity_291682", "image": "PMC-VQA/images/PMC8811295_F1_183380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the cerebral microbleeds located in the SWI images?\nChoices:\n(A) Right basal ganglia \n(B) Left basal ganglia \n(C) Cerebellum \n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 4.182740211486816 }, { "id": "identity_3094", "image": "FigureQA/images/70905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Khaki have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.570293426513672 }, { "id": "identity_195731", "image": "PMC-VQA/images/PMC3874542_F5_254590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct areas of change can be identified in the second column from the left?" }, { "from": "gpt", "value": "The answer is Four areas" } ], "clip_score": 20.390625, "loss": 5.165075778961182 }, { "id": "identity_30299", "image": "AI2D/images/2969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by B?\nChoices:\n(A) axon terminals\n(B) node of ranvier\n(C) none of the above\n(D) myelin sheath" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.03125, "loss": 4.057436943054199 }, { "id": "identity_107303", "image": "VizWiz/images/VizWiz_train_00017172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers of petals does the central flower have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.828125, "loss": 4.707794189453125 }, { "id": "identity_327398", "image": "PMC-VQA/images/PMC6224014_Fig4_393846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dotted lines in these images indicate?\nChoices:\n(A) The location of retinal layers,\n(B) The location of PED,\n(C) The edge of sclera,\n(D) The location of optic nerve head." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 4.217495441436768 }, { "id": "identity_257909", "image": "VQA2.0/images/305916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible indicators that suggest the water is calm, and how do they compare to typical indicators of rough water?" }, { "from": "gpt", "value": "The answer is Small waves, smooth surface, steady objects indicate calm water" } ], "clip_score": 17.40625, "loss": 4.958832740783691 }, { "id": "identity_92655", "image": "IconQA/images/101040.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which direction is the point of the shape on the frame facing?" }, { "from": "gpt", "value": "The answer is Upwards" } ], "clip_score": 24.640625, "loss": 4.634419918060303 }, { "id": "identity_286697", "image": "AI2D/images/987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter on the diagram indicates the follicle?\nChoices:\n(A) R\n(B) H\n(C) D\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.203125, "loss": 4.408966541290283 }, { "id": "identity_35420", "image": "A-OKVQA/images/472143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the couple who sit facing camera doing?\nChoices:\n(A) gaming\n(B) texting\n(C) singing\n(D) eating" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.671875, "loss": 4.26755428314209 }, { "id": "identity_265787", "image": "DocVQA/images/nywf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the identification number for the connection?" }, { "from": "gpt", "value": "The answer is HUA MEI" } ], "clip_score": 23.8125, "loss": 4.257565498352051 }, { "id": "identity_35585", "image": "PMC-VQA/images/PMC9028518_insects-13-00317-f006_260904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the field of view in the figures? \nChoices:\n(A) flower \n(B) stem \n(C) leaf \n(D) root " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.625, "loss": 4.090874671936035 }, { "id": "identity_21314", "image": "DocVQA/images/qnjw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the collection name given in the separation record?" }, { "from": "gpt", "value": "The answer is Robert Shank Collection" } ], "clip_score": 26.375, "loss": 4.667405605316162 }, { "id": "identity_331746", "image": "PMC-VQA/images/PMC7215337_ijms-21-02817-f005_166971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the inset at a higher magnification?\nChoices:\n(A) Rudimentary alveolar wall \n(B) Chlamydial inclusions \n(C) Interlobular septum \n(D) Plasma cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.6875, "loss": 4.826258659362793 }, { "id": "identity_57577", "image": "DVQA/images/bar_train_00166070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the lowest accuracy for the 'habit' algorithm?" }, { "from": "gpt", "value": "The answer is fear" } ], "clip_score": 31.09375, "loss": 4.773800849914551 }, { "id": "identity_183868", "image": "VQA-AS/images/19416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are lying on the ground to the left of the image?" }, { "from": "gpt", "value": "The answer is One log" } ], "clip_score": 20.53125, "loss": 4.286868095397949 }, { "id": "identity_285708", "image": "IconQA/images/23812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a square as a face of this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.203125, "loss": 4.114302635192871 }, { "id": "identity_325675", "image": "PMC-VQA/images/PMC4398354_pone.0123440.g003_378311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the colored bar image here?\nChoices:\n(A) Beam paths \n(B) Different frequencies \n(C) Eye movements \n(D) Brain activity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.90625, "loss": 4.611187934875488 }, { "id": "identity_15813", "image": "VizWiz/images/VizWiz_train_00013290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of plant is this? " }, { "from": "gpt", "value": "The answer is green flowers" } ], "clip_score": 23.984375, "loss": 4.033559322357178 }, { "id": "identity_106832", "image": "VQA2.0/images/579329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine bottles are visible on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.546875, "loss": 4.46574592590332 }, { "id": "identity_14166", "image": "Super-CLEVR/images/superCLEVR_new_001586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other big objects of the same shape as the big red thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 15.8359375, "loss": 5.311081409454346 }, { "id": "identity_304647", "image": "FigureQA/images/76928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.796875, "loss": 4.1144514083862305 }, { "id": "identity_191740", "image": "PMC-VQA/images/PMC7865008_Fig10_361447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow objects are present in the 3D model?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.921875, "loss": 4.416399955749512 }, { "id": "identity_270130", "image": "TQA/images/abc_question_images/optics_lense_types_19157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the plano-concave part in the diverging lens.\nChoices:\n(A) N\n(B) M\n(C) S\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.40625, "loss": 4.147269248962402 }, { "id": "identity_149060", "image": "DocVQA/images/jmwn0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the last comment?" }, { "from": "gpt", "value": "The answer is Line" } ], "clip_score": 18.234375, "loss": 4.919771671295166 }, { "id": "identity_152517", "image": "ScienceQA/images/19025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature directly north of the marked state?" }, { "from": "gpt", "value": "The answer is Oregon" } ], "clip_score": 27.375, "loss": 4.219334125518799 }, { "id": "identity_31853", "image": "TQA/images/question_images/human_system_digestive_3686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the organ beside the stomach?\nChoices:\n(A) Small Intestine\n(B) Liver\n(C) Rectum\n(D) Large Intestine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.235092639923096 }, { "id": "identity_299282", "image": "A-OKVQA/images/139183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the white and brown horse currently engaged in?\nChoices:\n(A) jumping\n(B) falling\n(C) sitting down\n(D) standing up" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9375, "loss": 4.412480354309082 }, { "id": "identity_290492", "image": "TQA/images/question_images/velocity_time_graphs_8222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On deceleration, does the blue line show an increase, decrease, or remain the same?\nChoices:\n(A) decrease\n(B) increase\n(C) stay the same\n(D) curves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.8125, "loss": 4.206427097320557 }, { "id": "identity_297318", "image": "A-OKVQA/images/554040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable content of the green substance in the bottle among the following options: soap, chalk, jam, or jelly?\nChoices:\n(A) soap\n(B) chalk\n(C) jam\n(D) jelly" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.75, "loss": 4.720606327056885 }, { "id": "identity_264891", "image": "Super-CLEVR/images/superCLEVR_new_000153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the large matte thing behind the cyan rubber thing the same as the shiny scooter to the right of the gray metallic scooter?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.953125, "loss": 5.1340413093566895 }, { "id": "identity_273734", "image": "AI2D/images/1767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism in the terrestrial food web receives energy from both producers and consumers? Options include Snail, Shrew, Wood mouse, and Beetle.\nChoices:\n(A) Snail\n(B) Shrew\n(C) Wood mouse\n(D) Beetle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.02053689956665 }, { "id": "identity_115706", "image": "PMC-VQA/images/PMC5034412_Fig6_76656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"H\" indicate in this image?" }, { "from": "gpt", "value": "The answer is A specific structure within the specimen" } ], "clip_score": 27.40625, "loss": 4.029897212982178 }, { "id": "identity_247987", "image": "A-OKVQA/images/254177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key elements of his attire that indicate the dress code of the event, and how do they collectively suggest the formality level?" }, { "from": "gpt", "value": "The answer is formal" } ], "clip_score": 20.953125, "loss": 5.952803611755371 }, { "id": "identity_464", "image": "Super-CLEVR/images/superCLEVR_new_001871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the object that is both behind the brown matte jet and in front of the large blue fighter?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 31.59375, "loss": 4.108033657073975 }, { "id": "identity_285807", "image": "A-OKVQA/images/524470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mode of transportation is the person on the street using to move around?\nChoices:\n(A) scooter\n(B) skateboard\n(C) car\n(D) bike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.5390625, "loss": 4.050975322723389 }, { "id": "identity_307020", "image": "A-OKVQA/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the round object on the floor and connected to the wall clean?\nChoices:\n(A) shoes\n(B) floors\n(C) dishes\n(D) walls" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.515625, "loss": 4.575130939483643 }, { "id": "identity_195800", "image": "A-OKVQA/images/569716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electrical appliances can be seen on the kitchen counter?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.28125, "loss": 4.734652996063232 }, { "id": "identity_30400", "image": "PMC-VQA/images/PMC7519687_Fig3_261740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the arrow described in the caption? \nChoices:\n(A) Green \n(B) Blue \n(C) Red \n(D) Yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.203125, "loss": 4.484743118286133 }, { "id": "identity_323859", "image": "PMC-VQA/images/PMC7762008_diagnostics-10-01049-f005_327389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were used in this acquisition?\nChoices:\n(A) CT scans \n(B) T1-weighted images without gadolinium contrast \n(C) FLAIR images \n(D) T2-weighted images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.09375, "loss": 4.067249774932861 }, { "id": "identity_114272", "image": "Super-CLEVR/images/superCLEVR_new_001543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located directly behind the silver SUV?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.078125, "loss": 4.539857864379883 }, { "id": "identity_327783", "image": "PMC-VQA/images/PMC4965093_pone.0160250.g002_56971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white crosshair on this image indicate?\nChoices:\n(A) Tumor location\n(B) Cell count\n(C) Biopsy location\n(D) Scan border" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.03125, "loss": 4.296724796295166 }, { "id": "identity_266825", "image": "DocVQA/images/zqld0217_21.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the title?" }, { "from": "gpt", "value": "The answer is aed interactions dosage card" } ], "clip_score": 21.609375, "loss": 5.401360511779785 }, { "id": "identity_13858", "image": "TQA/images/question_images/human_system_digestive_3716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the liver in relation to the appendix?\nChoices:\n(A) It is to the right of the appendix\n(B) It is above the appendix\n(C) It is below the appendix\n(D) It is to the left of the appendix" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 4.532726287841797 }, { "id": "identity_324942", "image": "PMC-VQA/images/PMC6097448_Fig1_355565.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the observed enhancement pattern in the CT images of the neck?\nChoices:\n(A) peripheral enhancement pattern \n(B) central enhancement pattern \n(C) rim enhancement pattern \n(D) non-enhanced pattern" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.250441074371338 }, { "id": "identity_276272", "image": "TQA/images/abc_question_images/human_system_reproductory_17005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which letter represents the ovary?\nChoices:\n(A) N\n(B) H\n(C) Y\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.46875, "loss": 4.390622138977051 }, { "id": "identity_8930", "image": "VQA-AS/images/3509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the toy?" }, { "from": "gpt", "value": "The answer is doll" } ], "clip_score": 21.578125, "loss": 4.17417573928833 }, { "id": "identity_110096", "image": "VQA-AS/images/17987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red cups are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.671875, "loss": 4.203135013580322 }, { "id": "identity_30566", "image": "PMC-VQA/images/PMC8008917_f2_406371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black asterisks denote in the image? \nChoices:\n(A) Normal anatomy \n(B) Blood vessels \n(C) Lesions \n(D) Signal artifacts " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 4.454771995544434 }, { "id": "identity_222891", "image": "Super-CLEVR/images/superCLEVR_new_001393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more shiny vehicles or matte vehicles in the image?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 22.296875, "loss": 4.213186740875244 }, { "id": "identity_24642", "image": "TQA/images/abc_question_images/earth_moon_phases_11247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following shows the waxing gibbous?\nChoices:\n(A) W\n(B) Y\n(C) A\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.4375, "loss": 4.4130072593688965 }, { "id": "identity_3070", "image": "A-OKVQA/images/478099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What continent is this scene located in?\nChoices:\n(A) north america\n(B) australia\n(C) asia\n(D) europe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.9765625, "loss": 4.406964302062988 }, { "id": "identity_298018", "image": "AI2D/images/2225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the life cycle diagram of a butterfly, which term best describes the stage represented by letter C? Is it:\n(A) Caterpillar\n(B) Butterfly\n(C) Egg\n(D) Chrysalis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.875, "loss": 4.311344146728516 }, { "id": "identity_217963", "image": "VQA2.0/images/477887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen around the circular structure?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.21875, "loss": 4.12399959564209 }, { "id": "identity_239910", "image": "FigureQA/images/61415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest peak value, and what is that value?" }, { "from": "gpt", "value": "The answer is Light Gold and Red, 65" } ], "clip_score": 25.6875, "loss": 4.347391605377197 }, { "id": "identity_27452", "image": "IconQA/images/76169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rectangles even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.15625, "loss": 4.117900371551514 }, { "id": "identity_321093", "image": "A-OKVQA/images/71603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the two people wearing helmets?\nChoices:\n(A) identification\n(B) protect heads\n(C) fashion\n(D) incognito" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.359375, "loss": 4.253279685974121 }, { "id": "identity_32740", "image": "FigureQA/images/78534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.234375, "loss": 4.25338077545166 }, { "id": "identity_333765", "image": "PMC-VQA/images/PMC7592484_f2_284001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lung zones are affected in a patient with this CT pattern? \nChoices:\n(A) upper zones \n(B) middle zones \n(C) lower zones \n(D) all zones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.9375, "loss": 4.0971269607543945 }, { "id": "identity_331664", "image": "IconQA/images/105400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circular face on this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.359375, "loss": 4.933250427246094 }, { "id": "identity_331075", "image": "PMC-VQA/images/PMC4311646_F2_353925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to identify the distribution pattern of a substance in the kidney?\nChoices:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.125, "loss": 4.284182548522949 }, { "id": "identity_239751", "image": "ScienceQA/images/18590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital" }, { "from": "gpt", "value": "The answer is Columbia" } ], "clip_score": 28.875, "loss": 4.720315933227539 }, { "id": "identity_317263", "image": "Super-CLEVR/images/superCLEVR_new_001852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the shiny object that is behind a brown object and in front of a tiny yellow metal thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 17.875, "loss": 4.206254005432129 }, { "id": "identity_293656", "image": "VizWiz/images/VizWiz_train_00009981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe what is shown in this image?" }, { "from": "gpt", "value": "The answer is cel phone home screen" } ], "clip_score": 21.40625, "loss": 4.285951614379883 }, { "id": "identity_27163", "image": "PMC-VQA/images/PMC6337062_F4_424369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the area of stroke located in the image? \nChoices:\n(A) Denoted by a white arrow \n(B) Denoted by an asterisk (∗) \n(C) At the site of injection \n(D) In the bottom panel " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.375, "loss": 5.108024597167969 }, { "id": "identity_10979", "image": "IconQA/images/51079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) impossible\n(B) certain\n(C) probable\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.203125, "loss": 4.142325401306152 }, { "id": "identity_119841", "image": "TextVQA/images/d9e27054a3b1badf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text \"DIGITAL RECORDING\" on the cover?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 13.203125, "loss": 4.222714900970459 }, { "id": "identity_137918", "image": "DocVQA/images/pqmw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is the sender apologizing for not sending earlier?" }, { "from": "gpt", "value": "The answer is payment for the Canada Goose" } ], "clip_score": 19.609375, "loss": 4.873173713684082 }, { "id": "identity_317286", "image": "VQA-AS/images/16427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are these plants?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 21.03125, "loss": 4.102281093597412 }, { "id": "identity_311920", "image": "PMC-VQA/images/PMC7693825_jcm-09-03516-f002_306865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the grey color indicate in this image?\nChoices:\n(A) Post-resection CT-scan data \n(B) Native mandible \n(C) Mandibular displacement \n(D) Plate rupture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 5.1628007888793945 }, { "id": "identity_13042", "image": "FigureQA/images/36771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.087365627288818 }, { "id": "identity_317426", "image": "AI2D/images/1297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage does Letter A refer to?\nChoices:\n(A) Stage B\n(B) Stage D\n(C) Stage C\n(D) Stage A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.765625, "loss": 4.046492576599121 }, { "id": "identity_223163", "image": "Super-CLEVR/images/superCLEVR_new_001478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is located behind the green metallic motorbike and to the left of the blue and yellow airplane?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 33.28125, "loss": 4.4038166999816895 }, { "id": "identity_38792", "image": "DVQA/images/bar_train_00179889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for the 'grip' dataset?" }, { "from": "gpt", "value": "The answer is horror" } ], "clip_score": 38.28125, "loss": 4.023274898529053 }, { "id": "identity_130885", "image": "A-OKVQA/images/480717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.4609375, "loss": 4.060394287109375 }, { "id": "identity_310967", "image": "A-OKVQA/images/328935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this crowd here?\nChoices:\n(A) free food\n(B) traveling\n(C) concert\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.0625, "loss": 4.276376724243164 }, { "id": "identity_237126", "image": "TQA/images/abc_question_images/human_system_respiratory_13584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure that is directly superior to the tongue and directly inferior to the nasal cavity" }, { "from": "gpt", "value": "The answer is K" } ], "clip_score": 31.5, "loss": 4.142530918121338 }, { "id": "identity_266697", "image": "VQA2.0/images/117428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sofa?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 20.3125, "loss": 4.033211708068848 }, { "id": "identity_325785", "image": "DocVQA/images/mflv0001_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company?" }, { "from": "gpt", "value": "The answer is RJR corporate affairs" } ], "clip_score": 18.5625, "loss": 4.001955032348633 }, { "id": "identity_321768", "image": "ScienceQA/images/16809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Saint Kitts and Nevis\n(B) Haiti\n(C) Jamaica\n(D) Cuba" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.556087493896484 }, { "id": "identity_279599", "image": "Super-CLEVR/images/superCLEVR_new_001452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the location of the small road bike in relation to the tiny bus, the large cyan object, and the tiny gray matte bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.953125, "loss": 4.101035118103027 }, { "id": "identity_46125", "image": "DVQA/images/bar_train_00014852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of accuracies displayed on the graph?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 29.890625, "loss": 4.012741565704346 }, { "id": "identity_293334", "image": "ScienceQA/images/17236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Salt Lake City\n(B) Tucson\n(C) Phoenix\n(D) Providence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.078125, "loss": 4.177336692810059 }, { "id": "identity_335480", "image": "PMC-VQA/images/PMC5303600_fig1_126319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is attached to the radius in this image?\nChoices:\n(A) Ligaments\n(B) Muscles\n(C) Tendons\n(D) Bones" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.328125, "loss": 4.094882011413574 }, { "id": "identity_121074", "image": "VizWiz/images/VizWiz_train_00012686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of support structure is used for the plants?" }, { "from": "gpt", "value": "The answer is A stick or rod" } ], "clip_score": 19.578125, "loss": 4.462497711181641 }, { "id": "identity_271363", "image": "FigureQA/images/19511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following statements correctly compares Bubblegum and Brown, stating whether Bubblegum is less than Brown? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.75, "loss": 4.034841060638428 }, { "id": "identity_111485", "image": "TextVQA/images/8001fd6035bed471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the user who owns this aircraft?" }, { "from": "gpt", "value": "The answer is twbmwot" } ], "clip_score": 26.4375, "loss": 5.60974645614624 }, { "id": "identity_274022", "image": "TQA/images/question_images/tides_2612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What force is responsible for causing tides? Is it:\n(A) Gravitational\n(B) Human\n(C) Magnetic\n(D) Electronic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.15625, "loss": 4.468043804168701 }, { "id": "identity_107943", "image": "VizWiz/images/VizWiz_train_00008667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paws of the dog are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.5, "loss": 4.315793991088867 }, { "id": "identity_292986", "image": "A-OKVQA/images/374520.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which continent is the headquarters of this airline company located?\nChoices:\n(A) Asia\n(B) Europe\n(C) North America\n(D) Africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.875, "loss": 4.312098026275635 }, { "id": "identity_19270", "image": "Super-CLEVR/images/superCLEVR_new_001531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small metallic object of the same shape as the large blue thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.90625, "loss": 4.979164123535156 }, { "id": "identity_269686", "image": "AI2D/images/3123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where does the leaf blade originate?\nChoices:\n(A) leaf sheath\n(B) Stem\n(C) Branch\n(D) fused" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.0625, "loss": 4.321537971496582 }, { "id": "identity_271300", "image": "A-OKVQA/images/76310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the birds engaged in close to the lamb?\nChoices:\n(A) eating\n(B) playing\n(C) dancing\n(D) attacking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.100089073181152 }, { "id": "identity_321425", "image": "FigureQA/images/74728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue associated with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.859375, "loss": 4.374440670013428 }, { "id": "identity_288170", "image": "PMC-VQA/images/PMC8579540_Fig1_107443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the response to treatment visualized? \nChoices:\n(A) X-Ray\n(B) Computed tomography\n(C) Magnetic resonance imaging\n(D) Biopsy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 5.175934791564941 }, { "id": "identity_299744", "image": "ScienceQA/images/2100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Darnell's experiment to answer?\nChoices:\n(A) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?\n(B) Can pennies hold more drops of pure water or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.3125, "loss": 4.293097019195557 }, { "id": "identity_265183", "image": "A-OKVQA/images/209145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the white tube on top of the cabinet used? Is it for heating, ventilation, air conditioning, or water?\nChoices:\n(A) heating\n(B) ventilation\n(C) air conditioning\n(D) water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3125, "loss": 4.301735877990723 }, { "id": "identity_214825", "image": "TextVQA/images/2be1beb5b96dbe54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many floral patterns are present in the clock's design?" }, { "from": "gpt", "value": "The answer is Four large flowers" } ], "clip_score": 28.640625, "loss": 4.601817607879639 }, { "id": "identity_20776", "image": "IconQA/images/214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a dog. Which picture is fourth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.08721923828125 }, { "id": "identity_34667", "image": "A-OKVQA/images/356384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the board trying to maintain?\nChoices:\n(A) dryness\n(B) height\n(C) speed\n(D) balance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.0546875, "loss": 4.219555854797363 }, { "id": "identity_271782", "image": "AI2D/images/3727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the region in the diagram that corresponds to the appendix.\nChoices:\n(A) h\n(B) e\n(C) g\n(D) k" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.052925109863281 }, { "id": "identity_331639", "image": "Super-CLEVR/images/superCLEVR_new_000972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large metal tandem bikes than yellow matte regular vehicles?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.9375, "loss": 5.028400897979736 }, { "id": "identity_313134", "image": "A-OKVQA/images/575465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this activity taking place?\nChoices:\n(A) sand\n(B) gym\n(C) stadium\n(D) park" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.53125, "loss": 4.6427483558654785 }, { "id": "identity_282285", "image": "PMC-VQA/images/PMC8572927_F1_105287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During coronary angiography, which artery was discovered to have a 70% stenosis?\nChoices:\n(A) left pulmonary artery\n(B) right atrial branch\n(C) left anterior descending coronary artery\n(D) right ventricular artery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.203125, "loss": 4.397507190704346 }, { "id": "identity_190021", "image": "VQA2.0/images/121562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final destination of the bus?" }, { "from": "gpt", "value": "The answer is Larkin Square" } ], "clip_score": 25.546875, "loss": 4.200536251068115 }, { "id": "identity_273651", "image": "AI2D/images/3761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the figure is located at the bottom?\nChoices:\n(A) diaphragm\n(B) trachea\n(C) none of the above\n(D) left lung" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.4375, "loss": 4.228553771972656 }, { "id": "identity_65866", "image": "AI2D/images/1908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are present in this food chain?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 27.25, "loss": 4.68234395980835 }, { "id": "identity_301956", "image": "FigureQA/images/47589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 4.038724422454834 }, { "id": "identity_322290", "image": "VQA2.0/images/336181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of buildings are at the end of the street?" }, { "from": "gpt", "value": "The answer is stores" } ], "clip_score": 21.046875, "loss": 4.646264553070068 }, { "id": "identity_6194", "image": "TextVQA/images/3fa6c2b883eee9da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the title of the book on the man's desk?" }, { "from": "gpt", "value": "The answer is semantic web" } ], "clip_score": 22.59375, "loss": 4.441373348236084 }, { "id": "identity_275790", "image": "TextVQA/images/001d028a021c9c00.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: With what do these correspond?" }, { "from": "gpt", "value": "The answer is your throat " } ], "clip_score": 21.5, "loss": 4.242545127868652 }, { "id": "identity_338180", "image": "PMC-VQA/images/PMC6019330_FIG3_333505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrow point to in the image?\nChoices:\n(A) Radiolucent zone \n(B) Screw \n(C) Fracture \n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.78125, "loss": 4.574609279632568 }, { "id": "identity_44533", "image": "DocVQA/images/ffjv0228_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two herbicides mentioned under the Environmental Options?" }, { "from": "gpt", "value": "The answer is Dowpon and Pyramin" } ], "clip_score": 26.125, "loss": 4.9933881759643555 }, { "id": "identity_110667", "image": "Super-CLEVR/images/superCLEVR_new_001351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.984375, "loss": 4.796936988830566 }, { "id": "identity_322730", "image": "IconQA/images/35161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.625, "loss": 4.153720378875732 }, { "id": "identity_291425", "image": "A-OKVQA/images/575406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag does the color scheme of this vehicle represent? \nChoices:\n(A) Kazakhstan\n(B) Djibouti\n(C) France\n(D) Mexico" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.90625, "loss": 4.059420585632324 }, { "id": "identity_111856", "image": "VQA2.0/images/356834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word on the top left of the image?" }, { "from": "gpt", "value": "The answer is Detail" } ], "clip_score": 17.515625, "loss": 4.0073089599609375 }, { "id": "identity_329213", "image": "PMC-VQA/images/PMC6215355_Fig2_390952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to reveal low signals in certain brain sulci?\nChoices:\n(A) CT \n(B) MRI \n(C) PET \n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.15625, "loss": 4.912408828735352 }, { "id": "identity_187818", "image": "A-OKVQA/images/185721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing lines are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.140625, "loss": 4.326991081237793 }, { "id": "identity_15815", "image": "PMC-VQA/images/PMC5084403_Fig10_88889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by red fluorescence? \nChoices:\n(A) Endoplasmic reticulum \n(B) Mitochondria \n(C) Nucleus \n(D) Golgi apparatus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.703125, "loss": 4.210017204284668 }, { "id": "identity_274488", "image": "Super-CLEVR/images/superCLEVR_new_001640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any small objects share the same shape as the large metal object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.703125, "loss": 4.880829334259033 }, { "id": "identity_11565", "image": "A-OKVQA/images/574692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they waiting in line for?\nChoices:\n(A) buy juice\n(B) pick oranges\n(C) exit\n(D) sell juice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.049929141998291 }, { "id": "identity_193526", "image": "VQA2.0/images/409897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.8203125, "loss": 4.37414026260376 }, { "id": "identity_315428", "image": "Super-CLEVR/images/superCLEVR_new_001839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this motorbike the same size as the matte aeroplane?\nIs the gray metallic motorbike the same size as this aeroplane?\nIs this motorbike the same size as this aeroplane?\nAre they the same size?\nIs the motorbike the same size as the aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.1875, "loss": 4.930785179138184 }, { "id": "identity_299761", "image": "AI2D/images/621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of part B of the tree among the following options?\n(A) Produce flowers and fruits\n(B) Support branches and leaves\n(C) Absorb nutrients from the soil\n(D) Photosynthesis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.4411163330078125 }, { "id": "identity_212649", "image": "A-OKVQA/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.40625, "loss": 4.334668159484863 }, { "id": "identity_289550", "image": "PMC-VQA/images/PMC5033523_pone.0163637.g003_76298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white dots in the photomicrograph?\nChoices:\n(A) Bleeding \n(B) Absorbable mesh \n(C) Non-absorbable mesh \n(D) Infections" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.546875, "loss": 4.858387470245361 }, { "id": "identity_211901", "image": "A-OKVQA/images/136542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the elephants' skin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 25.578125, "loss": 4.172416687011719 }, { "id": "identity_271706", "image": "IconQA/images/63322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on? \nOptions:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.359375, "loss": 4.024903297424316 }, { "id": "identity_219464", "image": "VQA2.0/images/147829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Three people" } ], "clip_score": 18.359375, "loss": 4.757380485534668 }, { "id": "identity_31249", "image": "DocVQA/images/jnyb0018_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the RJR Attendees for the IGA Retailer of the year Breakfast held on Monday, May 8??" }, { "from": "gpt", "value": "The answer is jim helm" } ], "clip_score": 25.34375, "loss": 4.288977146148682 }, { "id": "identity_23457", "image": "Super-CLEVR/images/superCLEVR_new_001594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The car that is the same material as the tiny wagon is what size?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 27.59375, "loss": 4.256011009216309 }, { "id": "identity_293770", "image": "A-OKVQA/images/536145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word is most likely to be found between \"see\" and \"now\" on the person's back?\nChoices:\n(A) mexico\n(B) me\n(C) money\n(D) mom" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.3125, "loss": 4.012876510620117 }, { "id": "identity_199656", "image": "PMC-VQA/images/PMC7447704_Fig15_240131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many asterisks are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.84375, "loss": 4.5285115242004395 }, { "id": "identity_284174", "image": "PMC-VQA/images/PMC8586710_F11_110541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following structures is visible on the leaf surface? \n(A) Stomata \n(B) Root hairs \n(C) Salt glands \n(D) Cuticle layers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.5, "loss": 4.744834899902344 }, { "id": "identity_272326", "image": "A-OKVQA/images/260901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which word is displayed on the blue truck among the following options: (A) reuse, (B) reduce, (C) quickly, or (D) orange?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5703125, "loss": 4.782842636108398 }, { "id": "identity_283906", "image": "ChartQA/images/23940851006052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color is used to represent China on the bar graph?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 17.71875, "loss": 4.971093654632568 }, { "id": "identity_264780", "image": "AI2D/images/3634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is located directly above the ulnar artery?\nChoices:\n(A) radial artery\n(B) lliac artery\n(C) femoral vein\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.359375, "loss": 4.751640796661377 }, { "id": "identity_78218", "image": "ScienceQA/images/8534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the lichen play in this food web?" }, { "from": "gpt", "value": "The answer is Producer" } ], "clip_score": 28.859375, "loss": 4.0262370109558105 }, { "id": "identity_68506", "image": "ScienceQA/images/9677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the beans in the image have any visible markings?" }, { "from": "gpt", "value": "The answer is Yes, white markings" } ], "clip_score": 30.578125, "loss": 4.024826526641846 }, { "id": "identity_306331", "image": "AI2D/images/4636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fly is shown in position B?\nChoices:\n(A) Stable fly\n(B) House fly\n(C) Face fly\n(D) Horn fly" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.46875, "loss": 4.0478515625 }, { "id": "identity_56496", "image": "MapQA/images/map_10228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage appearance greater than 75.9%?" }, { "from": "gpt", "value": "The answer is an integer representing the total count" } ], "clip_score": 30.8125, "loss": 4.088203430175781 }, { "id": "identity_7916", "image": "PlotQA/images/112850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where does the legend appear in the graph ?" }, { "from": "gpt", "value": "The answer is center right" } ], "clip_score": 19.109375, "loss": 4.7350850105285645 }, { "id": "identity_326586", "image": "PMC-VQA/images/PMC8745551_ijms-23-00147-f002_160008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the left inguinal lymph node?\nChoices:\n(A) Benign\n(B) Malignant\n(C) Inflammation\n(D) Scar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.09375, "loss": 4.393271446228027 }, { "id": "identity_283423", "image": "PMC-VQA/images/PMC5451375_Fig5_171420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct diagnosis for the patient shown in the image? \nChoices:\n(A) Glioblastoma\n(B) Oligodendroglioma\n(C) Diffuse astrocytoma\n(D) Medulloblastoma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.578125, "loss": 4.039198398590088 }, { "id": "identity_108684", "image": "TextVQA/images/427908f7605df621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the title of the book written in?" }, { "from": "gpt", "value": "The answer is Portuguese" } ], "clip_score": 23.078125, "loss": 4.323892116546631 }, { "id": "identity_64623", "image": "AI2D/images/324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.15625, "loss": 4.207503318786621 }, { "id": "identity_285590", "image": "A-OKVQA/images/56116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which location is the white cup located in front of the second chair from the left?\nChoices:\n(A) wal mart\n(B) dunkin\n(C) wawa\n(D) starbucks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.125, "loss": 4.4213080406188965 }, { "id": "identity_311623", "image": "A-OKVQA/images/200328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the birds likely doing in this image?\nChoices:\n(A) attacking\n(B) flyover\n(C) migrating\n(D) landing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.6875, "loss": 4.361245632171631 }, { "id": "identity_329738", "image": "DVQA/images/bar_train_00118962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the steelblue color?" }, { "from": "gpt", "value": "The answer is hay" } ], "clip_score": 23.5, "loss": 4.056302547454834 }, { "id": "identity_322259", "image": "A-OKVQA/images/216406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is someone who might easily overheat safest in this place?\nChoices:\n(A) on chair\n(B) under umbrella\n(C) in sand\n(D) water's edge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.65625, "loss": 4.3852009773254395 }, { "id": "identity_263169", "image": "A-OKVQA/images/448176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the presence of vehicles on the boat?\nChoices:\n(A) cross water\n(B) for sale\n(C) cleaning them\n(D) free ride" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.5194220542907715 }, { "id": "identity_287117", "image": "VizWiz/images/VizWiz_train_00006101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you explain what is being depicted in this photo?" }, { "from": "gpt", "value": "The answer is this photo describing news stories" } ], "clip_score": 17.046875, "loss": 5.263316631317139 }, { "id": "identity_106950", "image": "TextVQA/images/91890335c83cbd76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sticky note attached to the bottom edge of the paper grid?" }, { "from": "gpt", "value": "The answer is Monsters move first" } ], "clip_score": 23.578125, "loss": 5.058791160583496 }, { "id": "identity_291670", "image": "ScienceQA/images/12879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is being emphasized in the following options?\n(A) the Southern Ocean\n(B) the Pacific Ocean\n(C) the Arctic Ocean\n(D) the Atlantic Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.25, "loss": 4.136906623840332 }, { "id": "identity_307341", "image": "PMC-VQA/images/PMC6226003_F2_394343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter D indicate in the subcellular structures in lamellipodia? \nChoices:\n(A) Focal Adhesion, \n(B) Cell Spreading, \n(C) F-Actin Fiber, \n(D) Nanolines." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.1875, "loss": 4.072484016418457 }, { "id": "identity_323860", "image": "A-OKVQA/images/573973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the oval object connected to these keys?\nChoices:\n(A) carabiner\n(B) ubolt\n(C) key chain\n(D) knife" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.9609375, "loss": 4.4694414138793945 }, { "id": "identity_14428", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the shiny aeroplane in front of the big purple thing and the metallic aeroplane behind the big aeroplane have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.90625, "loss": 5.338090419769287 }, { "id": "identity_23102", "image": "Super-CLEVR/images/superCLEVR_new_001232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the small chopper the same as the large road bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.4375, "loss": 5.150150775909424 }, { "id": "identity_183967", "image": "VQA-AS/images/8621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman doing?" }, { "from": "gpt", "value": "The answer is Reading" } ], "clip_score": 22.203125, "loss": 4.428213119506836 }, { "id": "identity_324215", "image": "A-OKVQA/images/450833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these cones mark?\nChoices:\n(A) holes\n(B) finish\n(C) lanes\n(D) danger" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.53125, "loss": 4.419224739074707 }, { "id": "identity_331441", "image": "PMC-VQA/images/PMC7738328_F2_321392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being addressed in this image?\nChoices:\n(A) A broken bone\n(B) A heart disease\n(C) A respiratory disorder\n(D) A neurological disorder" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0, "loss": 4.611900806427002 }, { "id": "identity_298664", "image": "PMC-VQA/images/PMC8215858_FIG1_472110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the inset image? \nOptions:\n(A) Ki-67\n(B) CD99\n(C) H&E\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.359375, "loss": 4.303139686584473 }, { "id": "identity_136981", "image": "DocVQA/images/rlvh0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 21.25, "loss": 4.238792896270752 }, { "id": "identity_316678", "image": "A-OKVQA/images/573784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary material used to construct this fireplace?\nChoices:\n(A) plastic\n(B) wood\n(C) stone\n(D) steel" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 4.578040599822998 }, { "id": "identity_2302", "image": "Super-CLEVR/images/superCLEVR_new_000322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a rubber bus; is it the same size as the matte object that is behind the tiny blue matte motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.234375, "loss": 4.625365734100342 }, { "id": "identity_325707", "image": "PMC-VQA/images/PMC4867686_F1_30691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the fourth column of the images?\nChoices:\n(A) GFP fluorescence \n(B) RFP fluorescence \n(C) Chlorophyll auto fluorescence \n(D) Differential interference contrast image" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.25, "loss": 4.267682075500488 }, { "id": "identity_308722", "image": "A-OKVQA/images/173693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is this?\nChoices:\n(A) play\n(B) race\n(C) concert\n(D) parade" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.46875, "loss": 4.3997721672058105 }, { "id": "identity_228941", "image": "A-OKVQA/images/141008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the front of the vehicle on the left, and how does it compare to the primary color of the front of the vehicle on the right?" }, { "from": "gpt", "value": "The answer is Both are mustard" } ], "clip_score": 18.921875, "loss": 5.286718368530273 }, { "id": "identity_264856", "image": "Super-CLEVR/images/superCLEVR_new_000103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is located to the left of the red rubber truck and behind the big shiny dirtbike?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 25.96875, "loss": 4.636263847351074 }, { "id": "identity_205849", "image": "VQA2.0/images/396934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red pants in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.890625, "loss": 4.298645973205566 }, { "id": "identity_276280", "image": "PMC-VQA/images/PMC7797455_FIG3_339777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the CT images, which lobes show more extensive effects?\nChoices:\n(A) Upper lobes\n(B) Middle lobes\n(C) Lower lobes\n(D) All lobes are equally affected" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.46875, "loss": 4.413824558258057 }, { "id": "identity_289815", "image": "A-OKVQA/images/360877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tool used to measure the weight of produce before buying?\nChoices:\n(A) tape measure\n(B) hands\n(C) price tag\n(D) scale" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.4375, "loss": 4.072721481323242 }, { "id": "identity_264682", "image": "Super-CLEVR/images/superCLEVR_new_000987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object located in front of the truck and to the left of the small purple shiny aeroplane?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 23.75, "loss": 4.045571327209473 }, { "id": "identity_3569", "image": "ScienceQA/images/3676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Cuba\n(B) The Bahamas\n(C) Barbados\n(D) Dominica" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.0625, "loss": 4.004169464111328 }, { "id": "identity_306897", "image": "PMC-VQA/images/PMC6737997_F2_36449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging technique was used to capture these images?\nChoices:\n(A) X-ray\n(B) MRI\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.515625, "loss": 4.02184534072876 }, { "id": "identity_15665", "image": "PMC-VQA/images/PMC5295295_ijerph-14-00044-f002_123866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the rat's body was examined in this study? \nChoices:\n(A) lungs \n(B) heart \n(C) thyroid and liver \n(D) brain and kidney " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.703125, "loss": 4.297690391540527 }, { "id": "identity_33468", "image": "A-OKVQA/images/190393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is attached to the chair?\nChoices:\n(A) knives\n(B) apples\n(C) wheels\n(D) balloons" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5390625, "loss": 4.365539073944092 }, { "id": "identity_22432", "image": "PMC-VQA/images/PMC6022295_Fig2_334194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific finding on the abdominal CT? \nChoices:\n(A) multiple nodules \n(B) diverticulitis \n(C) ulcers \n(D) polypoid lesions " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.21875, "loss": 4.538163185119629 }, { "id": "identity_295385", "image": "PMC-VQA/images/PMC5510979_FIG1_190600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What issue is being raised in relation to the MRI scan of the brain?\nChoices:\n(A) Suspicion of orbital cellulitis \n(B) Fracture in the temporal region \n(C) Signs of a tumor in the brain \n(D) No abnormalities detected in the brain scan." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.59375, "loss": 4.685975551605225 }, { "id": "identity_329286", "image": "A-OKVQA/images/314993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they hope you will do after you take a break?\nChoices:\n(A) leave\n(B) go jogging\n(C) buy soda\n(D) help them" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.015625, "loss": 4.217842102050781 }, { "id": "identity_4786", "image": "A-OKVQA/images/252549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What section of the store is this area?\nChoices:\n(A) women's section\n(B) babies' section\n(C) kids' section\n(D) men's section" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.28125, "loss": 4.008105278015137 }, { "id": "identity_320793", "image": "AI2D/images/3602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of D?\nChoices:\n(A) Provide structural support\n(B) Transmit air to the lungs\n(C) Transmit food to the stomach\n(D) Protect the esophagus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0, "loss": 4.216085910797119 }, { "id": "identity_3104", "image": "DocVQA/images/nypm0020_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Brand?" }, { "from": "gpt", "value": "The answer is eclipse" } ], "clip_score": 18.671875, "loss": 4.361429214477539 }, { "id": "identity_297050", "image": "A-OKVQA/images/508962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which character does the man, who is looking at his cell phone, portray in the following options: Mary Poppins, Jesus, Sweeney Todd, or Santa?\nChoices:\n(A) mary poppins\n(B) jesus\n(C) sweeny todd\n(D) santa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.0, "loss": 4.045906066894531 }, { "id": "identity_12161", "image": "AI2D/images/1934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the fox feed on?\nChoices:\n(A) bird\n(B) plant\n(C) caterpillar\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.0625, "loss": 4.147598743438721 }, { "id": "identity_266914", "image": "DVQA/images/bar_train_00144466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bottommost bar in each group?" }, { "from": "gpt", "value": "The answer is pay" } ], "clip_score": 24.0, "loss": 4.10984468460083 }, { "id": "identity_21941", "image": "A-OKVQA/images/545237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the circular light on the tall building?\nChoices:\n(A) clock\n(B) floodlight\n(C) window\n(D) sign" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.890625, "loss": 4.361380100250244 }, { "id": "identity_14031", "image": "A-OKVQA/images/168096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has caused traffic to stop?\nChoices:\n(A) accident\n(B) traffic light\n(C) construction\n(D) animal crossing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.34375, "loss": 4.501263618469238 }, { "id": "identity_327494", "image": "ScienceQA/images/16953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Des Moines\n(B) Tallahassee\n(C) Hilo\n(D) Pierre" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.663058757781982 }, { "id": "identity_75361", "image": "ScienceQA/images/15880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the image suggest happens after the heart-shaped plant stage?" }, { "from": "gpt", "value": "The answer is Fern plant starts to grow" } ], "clip_score": 29.265625, "loss": 4.523909568786621 }, { "id": "identity_288382", "image": "PMC-VQA/images/PMC8626752_Fig1_125594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area is shaded in blue among the following options:\n(A) The entire lung\n(B) The consolidated area\n(C) The ground-glass opacity\n(D) The subtracted parenchyma." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.890625, "loss": 4.011656284332275 }, { "id": "identity_307393", "image": "PMC-VQA/images/PMC2844753_F0004_60335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which plane of the body is depicted in the image?\nChoices:\n(A) Transverse plane\n(B) Frontal plane\n(C) Sagittal plane\n(D) Coronal plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.1875, "loss": 4.507867813110352 }, { "id": "identity_5427", "image": "ScienceQA/images/12971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Massachusetts\n(B) North Carolina\n(C) Michigan\n(D) Georgia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.28125, "loss": 4.150162220001221 }, { "id": "identity_314275", "image": "Super-CLEVR/images/superCLEVR_new_001412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a gray object in front of the large cyan shiny thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.1875, "loss": 5.344351291656494 }, { "id": "identity_24231", "image": "TabMWP/images/16236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which class ends at 3.20 P.M.?\nChoices:\n(A) English class\n(B) History class\n(C) Math class\n(D) Chemistry class" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.40625, "loss": 4.172183036804199 }, { "id": "identity_279469", "image": "PMC-VQA/images/PMC6264615_Fig2_403227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical procedure is being described in the caption? \nOptions:\n(A) CT scan \n(B) Echocardiogram \n(C) Thoracoscopy \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 4.281200885772705 }, { "id": "identity_327778", "image": "A-OKVQA/images/226594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color jacket is the person on the far left wearing?\nChoices:\n(A) black\n(B) purple\n(C) blue\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.734375, "loss": 4.148942470550537 }, { "id": "identity_334081", "image": "ScienceQA/images/17869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) hard\n(B) soft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.578125, "loss": 4.354837894439697 }, { "id": "identity_276838", "image": "A-OKVQA/images/153834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which genus does the cow belong?\nChoices:\n(A) bovinae\n(B) bovidae\n(C) bos\n(D) cattle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.125, "loss": 4.18515682220459 }, { "id": "identity_137418", "image": "MapQA/images/map_167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are represented by the second lightest shade on the map?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 30.265625, "loss": 4.103000164031982 }, { "id": "identity_298316", "image": "A-OKVQA/images/573349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which event is being recorded in this footage? Is it a protest, cake walk, horse show, or parade?\nChoices:\n(A) protest\n(B) cake walk\n(C) horse show\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.328125, "loss": 4.001438140869141 }, { "id": "identity_240692", "image": "VQA2.0/images/255329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man holding in his right hand, and what is the color of the object?" }, { "from": "gpt", "value": "The answer is A flower, yellow" } ], "clip_score": 18.390625, "loss": 4.359289646148682 }, { "id": "identity_286843", "image": "PMC-VQA/images/PMC3766138_F1_230254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is being utilized?\nChoices:\n(A) CT-scan\n(B) MRI\n(C) X-ray\n(D) PET-scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.875, "loss": 4.111387729644775 }, { "id": "identity_63347", "image": "AI2D/images/797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in the food chain?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 35.15625, "loss": 4.467449188232422 }, { "id": "identity_116642", "image": "VizWiz/images/VizWiz_train_00009199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text on the cup?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 26.765625, "loss": 4.3039116859436035 }, { "id": "identity_6074", "image": "TextVQA/images/23bfdbe38957354f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is this sero?" }, { "from": "gpt", "value": "The answer is ye" } ], "clip_score": 24.25, "loss": 4.643275260925293 }, { "id": "identity_13343", "image": "VizWiz/images/VizWiz_train_00016669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this CD and the disc number?" }, { "from": "gpt", "value": "The answer is scarecrow army thieves disc 1 10" } ], "clip_score": 26.828125, "loss": 4.367453098297119 }, { "id": "identity_283091", "image": "A-OKVQA/images/79333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the current situation of the woman in white? Is she:\n(A) getting soaked\n(B) getting hit\n(C) getting hot\n(D) getting sunburned" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.515625, "loss": 4.579985618591309 }, { "id": "identity_112774", "image": "VizWiz/images/VizWiz_train_00007241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the floor?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.53125, "loss": 4.477489948272705 }, { "id": "identity_230849", "image": "VQA2.0/images/267734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible reflections in the mirror, and how do they contribute to determining the weather conditions in the scene?" }, { "from": "gpt", "value": "The answer is Reflections indicate dry weather conditions" } ], "clip_score": 27.109375, "loss": 4.565242290496826 }, { "id": "identity_219452", "image": "VQA2.0/images/571644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs can be seen in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.390625, "loss": 4.553301811218262 }, { "id": "identity_31638", "image": "PMC-VQA/images/PMC8035775_Fig3_413415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging was used to show the fluid-attenuated inversion? \nChoices:\n(A) T1 WI \n(B) T2 WI \n(C) DWI \n(D) Flashing signal " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.421875, "loss": 4.990255355834961 }, { "id": "identity_292853", "image": "PMC-VQA/images/PMC6971865_Fig3_103405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to identify the subcutaneous mass out of the following options: X-ray, CT scan, MRI, or Ultrasound?\nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.46875, "loss": 4.586705207824707 }, { "id": "identity_2536", "image": "A-OKVQA/images/216325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of surf is the man on the far left holding?\nChoices:\n(A) funboard\n(B) fish\n(C) longboard\n(D) short board" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.34375, "loss": 4.254889011383057 }, { "id": "identity_226232", "image": "TextVQA/images/fb75fb2477d7112b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the colors of the business name and the storefront background?" }, { "from": "gpt", "value": "The answer is White and green" } ], "clip_score": 24.9375, "loss": 4.115828990936279 }, { "id": "identity_31603", "image": "PMC-VQA/images/PMC4928431_F4_46777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which hemisphere is affected by the restricted diffusion? \nChoices:\n(A) Left hemisphere \n(B) Right hemisphere \n(C) Both hemispheres \n(D) Frontal hemisphere " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.578125, "loss": 4.386831760406494 }, { "id": "identity_21483", "image": "A-OKVQA/images/88538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the umbrellas being used for?\nChoices:\n(A) lighting\n(B) blocking sun\n(C) stopping rain\n(D) decoration" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.5546875, "loss": 4.451238632202148 }, { "id": "identity_5472", "image": "AI2D/images/3856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which is the bottom part in this diagram?\nChoices:\n(A) hypoderms\n(B) ground tissue\n(C) epiderms\n(D) xylem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.109375, "loss": 4.18400764465332 }, { "id": "identity_314967", "image": "DocVQA/images/fywn0226_61.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is plotted in the y-axis of this graph?" }, { "from": "gpt", "value": "The answer is bmi" } ], "clip_score": 21.828125, "loss": 4.0224738121032715 }, { "id": "identity_331695", "image": "TextVQA/images/e66a0720f9ac2cf4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is from creative individuals?" }, { "from": "gpt", "value": "The answer is games" } ], "clip_score": 17.875, "loss": 4.3693389892578125 }, { "id": "identity_320872", "image": "A-OKVQA/images/244713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the individual in the white hat power?\nChoices:\n(A) nothing\n(B) blender\n(C) forward movement\n(D) aerobic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1875, "loss": 4.359466075897217 }, { "id": "identity_112023", "image": "Super-CLEVR/images/superCLEVR_new_000183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the large green metallic thing and has the same shape as the large green metallic thing?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.859375, "loss": 4.565131187438965 }, { "id": "identity_196560", "image": "PMC-VQA/images/PMC5691609_Fig4_241589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any annotation or labeling present on the image besides the magnification level?" }, { "from": "gpt", "value": "The answer is No additional annotation" } ], "clip_score": 22.09375, "loss": 5.291228771209717 }, { "id": "identity_314973", "image": "FigureQA/images/2430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon less than this color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.75, "loss": 4.039955139160156 }, { "id": "identity_277968", "image": "A-OKVQA/images/573784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is primarily used in constructing this fireplace?\nChoices:\n(A) plastic\n(B) wood\n(C) stone\n(D) steel" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.96875, "loss": 4.173134803771973 }, { "id": "identity_327831", "image": "FigureQA/images/31838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.640625, "loss": 4.651276111602783 }, { "id": "identity_335141", "image": "A-OKVQA/images/191850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they carrying umbrellas?\nChoices:\n(A) blocking rain\n(B) blocking sun\n(C) to dance\n(D) to sell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.390625, "loss": 4.209657192230225 }, { "id": "identity_294257", "image": "TQA/images/question_images/waves_9288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for half the full extent of a vibration, oscillation, or wave? \n(A) wavelength\n(B) chart\n(C) Amplitude\n(D) time" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.875, "loss": 4.34443998336792 }, { "id": "identity_334613", "image": "VQA2.0/images/507789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a carpet here?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.46875, "loss": 4.171506404876709 }, { "id": "identity_288905", "image": "PMC-VQA/images/PMC5093274_fig6_90987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is depicted in red in the photomicrographs? \n(A) Turnbull's Blue staining \n(B) hBMSC injection \n(C) MAB1281 \n(D) Fluorescence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.390625, "loss": 4.855069160461426 }, { "id": "identity_205751", "image": "PMC-VQA/images/PMC7318739_Fig1_200077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to label different areas in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.15625, "loss": 4.212014198303223 }, { "id": "identity_123705", "image": "VQA-RAD/images/synpic20375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the lesion?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 25.265625, "loss": 4.150773048400879 }, { "id": "identity_36218", "image": "A-OKVQA/images/232011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the item the person on the far right is holding protect against?\nChoices:\n(A) rain\n(B) vampires\n(C) bears\n(D) mosquitos" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.5, "loss": 4.622257709503174 }, { "id": "identity_200168", "image": "Super-CLEVR/images/superCLEVR_new_001658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the red scooter?" }, { "from": "gpt", "value": "The answer is A cyan airplane" } ], "clip_score": 26.21875, "loss": 4.193947792053223 }, { "id": "identity_301682", "image": "ScienceQA/images/14899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property matches this?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.65625, "loss": 4.581635475158691 }, { "id": "identity_49666", "image": "DVQA/images/bar_train_00080468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the lap store?" }, { "from": "gpt", "value": "The answer is The item past" } ], "clip_score": 29.046875, "loss": 5.296176433563232 }, { "id": "identity_195915", "image": "A-OKVQA/images/399540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is the bus parked in front of?" }, { "from": "gpt", "value": "The answer is Supermarket" } ], "clip_score": 23.234375, "loss": 4.0325212478637695 }, { "id": "identity_31105", "image": "PMC-VQA/images/PMC4645184_f10_443933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the right panels of the representative images? \nChoices:\n(A) Antibody staining \n(B) H&E staining \n(C) GFP fluorescence \n(D) Phase-contrast microscopy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.578125, "loss": 4.19339656829834 }, { "id": "identity_206692", "image": "PMC-VQA/images/PMC2995804_F2_80277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the body is predominantly affected by the abnormalities?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 23.484375, "loss": 4.135383129119873 }, { "id": "identity_130890", "image": "A-OKVQA/images/46339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tie on the central mannequin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 25.109375, "loss": 4.118896007537842 }, { "id": "identity_294324", "image": "PMC-VQA/images/PMC5356317_Fig6_141692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk in the image represent?\nChoices:\n(A) The position of the eye opening.\n(B) The position of the nose opening.\n(C) The position of the mouth opening.\n(D) The position of the ear opening." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.59375, "loss": 4.858364105224609 }, { "id": "identity_21593", "image": "A-OKVQA/images/45829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of store are the men most likely standing in?\nChoices:\n(A) educational\n(B) video games\n(C) kids toys\n(D) home appliances" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.84375, "loss": 4.201332092285156 }, { "id": "identity_34464", "image": "VizWiz/images/VizWiz_train_00011243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this say? What does this say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.90625, "loss": 4.137505054473877 }, { "id": "identity_275533", "image": "TQA/images/question_images/cellular_respiration_9043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the step that occurs between the Electron transport chain and Glycolysis in cellular respiration?\nChoices:\n(A) Oxygen\n(B) Citric acid cycle\n(C) Glucose\n(D) Electrons transferred by NADH" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.5625, "loss": 4.2880377769470215 }, { "id": "identity_7963", "image": "VizWiz/images/VizWiz_train_00006737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is can" } ], "clip_score": 16.9375, "loss": 4.566998481750488 }, { "id": "identity_272011", "image": "PMC-VQA/images/PMC6961606_fig1_100386.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which area of the chest radiography is the mass-like lesion located?\nChoices:\n(A) Right lower lung field\n(B) Left upper lung field\n(C) Middle of the lung field\n(D) Outside the lung field area" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.6875, "loss": 4.501912593841553 }, { "id": "identity_283706", "image": "PMC-VQA/images/PMC6243316_fig2_398165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the images, which part of the brain displays lesions?\nChoices:\n(A) Cerebellum\n(B) Brainstem\n(C) Thalamus\n(D) Hippocampus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5, "loss": 4.017123699188232 }, { "id": "identity_21520", "image": "DocVQA/images/nsnm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the President of the foundation?" }, { "from": "gpt", "value": "The answer is Arthur I. Sternhell" } ], "clip_score": 19.75, "loss": 5.0013227462768555 }, { "id": "identity_196040", "image": "VQA2.0/images/380499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cranes are visible in the skyline?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.8125, "loss": 4.5220112800598145 }, { "id": "identity_297062", "image": "A-OKVQA/images/298933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal closely resembles the kite flown above the beach by the young child? Is it a bird, butterfly, dragonfly, or dragon?\nChoices:\n(A) bird\n(B) butterfly\n(C) dragonfly\n(D) dragon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5625, "loss": 4.418717384338379 }, { "id": "identity_150869", "image": "ScienceQA/images/15848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock is shown in the image?" }, { "from": "gpt", "value": "The answer is Andesite" } ], "clip_score": 27.296875, "loss": 4.008462905883789 }, { "id": "identity_149430", "image": "DVQA/images/bar_train_00151335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest value for \"fist\"?" }, { "from": "gpt", "value": "The answer is balm" } ], "clip_score": 21.96875, "loss": 4.86406135559082 }, { "id": "identity_122476", "image": "A-OKVQA/images/356256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible inside the coffee shop?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.734375, "loss": 4.411283493041992 }, { "id": "identity_8491", "image": "PMC-VQA/images/PMC9315104_f2_350671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to identify the focal hyperdense lesion? \nChoices:\n(A) CT scan \n(B) Ultrasound \n(C) X-ray \n(D) MRI " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.75, "loss": 4.469596862792969 }, { "id": "identity_33895", "image": "IconQA/images/27960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.46875, "loss": 4.258210182189941 }, { "id": "identity_287743", "image": "ScienceQA/images/13882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three objects among the following choices: (A) yellow, (B) transparent, (C) fuzzy?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 4.228120803833008 }, { "id": "identity_324625", "image": "TextVQA/images/d7e1c905d63340f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first stop after departing from here?" }, { "from": "gpt", "value": "The answer is fortezza" } ], "clip_score": 15.109375, "loss": 4.50699520111084 }, { "id": "identity_245814", "image": "VQA-AS/images/10423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the image have hair that is not red, and what are the colors of their hair?" }, { "from": "gpt", "value": "The answer is Two people; black and brown" } ], "clip_score": 17.5625, "loss": 4.300446033477783 }, { "id": "identity_18802", "image": "ScienceQA/images/16869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the main persuasive appeal used in this ad?\nChoices:\n(A) pathos (emotion)\n(B) logos (reason)\n(C) ethos (character)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.43359375, "loss": 4.723050117492676 }, { "id": "identity_282535", "image": "ScienceQA/images/1101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction is the pull directed in relation to the giraffe?\nChoices:\n(A) towards the giraffe\n(B) away from the giraffe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.9375, "loss": 4.203294277191162 }, { "id": "identity_34500", "image": "A-OKVQA/images/284160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What persons most frequently ride this conveyance?\nChoices:\n(A) cable workers\n(B) prisoners\n(C) tourists\n(D) commuters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.046875, "loss": 4.409145355224609 }, { "id": "identity_155761", "image": "ScienceQA/images/8930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geographical feature is to the south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Mexico Border" } ], "clip_score": 28.078125, "loss": 4.351992130279541 }, { "id": "identity_240064", "image": "TextVQA/images/34d7098f4dd61106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name on the monitor, and what is the color of the text displaying the brand name?" }, { "from": "gpt", "value": "The answer is xcanvas, white" } ], "clip_score": 25.546875, "loss": 4.8815531730651855 }, { "id": "identity_11200", "image": "PMC-VQA/images/PMC1266401_F1_3693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the thrombus located? \nChoices:\n(A) Aortic valve \n(B) Tricuspid valve \n(C) Pulmonary valve \n(D) Mitral valve " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.217779636383057 }, { "id": "identity_13509", "image": "PMC-VQA/images/PMC3411849_pone-0042850-g007_148512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining technique was used for the tissue sections? \nChoices:\n(A) Immunohistochemistry \n(B) Hematoxylin and eosin staining \n(C) Periodic acid-Schiff staining \n(D) Masson's trichrome staining " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.140625, "loss": 4.640389919281006 }, { "id": "identity_185319", "image": "PMC-VQA/images/PMC6707203_Fig2_27521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are predominantly shown in the image?" }, { "from": "gpt", "value": "The answer is Plant or animal cells" } ], "clip_score": 23.921875, "loss": 4.751927375793457 }, { "id": "identity_45205", "image": "DocVQA/images/qjgn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the royalty product described in this payment request?" }, { "from": "gpt", "value": "The answer is Vanguard - Mono-Lock" } ], "clip_score": 23.296875, "loss": 4.646234035491943 }, { "id": "identity_291465", "image": "PMC-VQA/images/PMC4156216_f4-ol-08-04-1713_318274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized among the following options: CT scan, MRI, PET scan, or X-ray?\nChoices:\n(A) CT scan \n(B) MRI \n(C) PET scan \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 4.16005802154541 }, { "id": "identity_960", "image": "A-OKVQA/images/394874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are the elephants most likely to cross this river?\nChoices:\n(A) swim\n(B) walk across\n(C) ride boat\n(D) fly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.569375514984131 }, { "id": "identity_4760", "image": "ScienceQA/images/10485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Trenton\n(B) Hartford\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.640625, "loss": 4.15505313873291 }, { "id": "identity_25697", "image": "A-OKVQA/images/254177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the dress code of the event he's going to?\nChoices:\n(A) casual\n(B) formal\n(C) business\n(D) semi-formal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.84375, "loss": 4.3057990074157715 }, { "id": "identity_107476", "image": "Super-CLEVR/images/superCLEVR_new_001683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object located between the green and red objects?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 26.359375, "loss": 4.939220905303955 }, { "id": "identity_309550", "image": "DVQA/images/bar_train_00064068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in this chart?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 30.859375, "loss": 4.045383453369141 }, { "id": "identity_56330", "image": "DVQA/images/bar_train_00187612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the top?" }, { "from": "gpt", "value": "The answer is desire" } ], "clip_score": 24.59375, "loss": 4.275548458099365 }, { "id": "identity_332995", "image": "PMC-VQA/images/PMC5051730_pone.0163778.g003_80800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of microscope was used to observe the mold colonies?\nChoices:\n(A) Phase contrast microscope\n(B) Fluorescence microscope\n(C) Confocal microscope\n(D) Stereomicroscope" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.703125, "loss": 4.246018886566162 }, { "id": "identity_34587", "image": "PMC-VQA/images/PMC4606887_Fig1_433713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is shown in image (a)? \nChoices:\n(A) anterior cerebellum \n(B) frontal cortex \n(C) posterior cerebellum \n(D) occipital lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.125, "loss": 4.2582902908325195 }, { "id": "identity_274048", "image": "TQA/images/abc_question_images/nuclear_energy_17102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the turbine in the provided picture? \nChoices:\n(A) D\n(B) C\n(C) V\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.734375, "loss": 4.1001129150390625 }, { "id": "identity_106079", "image": "TextVQA/images/6cf56d2782c00d9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of nuts are visible on the lower shelf?" }, { "from": "gpt", "value": "The answer is Two types" } ], "clip_score": 20.171875, "loss": 4.727962970733643 }, { "id": "identity_233261", "image": "IconQA/images/57919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beans are there in each row, and how many rows are there in total? Then, calculate the total number of beans" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 25.078125, "loss": 4.096478462219238 }, { "id": "identity_258732", "image": "UniGeo/images/2647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在▱ABCD中,已知AD=8cm,AB=6cm,DE平分∠ADC交BC边于点E,求△ADE的面积。" }, { "from": "gpt", "value": "The answer is \\(1.5h\\)" } ], "clip_score": 30.109375, "loss": 4.116087913513184 }, { "id": "identity_12599", "image": "A-OKVQA/images/296851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mode of transport is in the above picture?\nChoices:\n(A) railway\n(B) air\n(C) water\n(D) road" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.6953125, "loss": 4.522487163543701 }, { "id": "identity_266711", "image": "A-OKVQA/images/64116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the person responsible for taking care of sheep?\nChoices:\n(A) zookeeper\n(B) breeder\n(C) manager\n(D) shepard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.109375, "loss": 4.0321269035339355 }, { "id": "identity_208864", "image": "VQA2.0/images/406282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.453125, "loss": 4.154447078704834 }, { "id": "identity_8506", "image": "TQA/images/abc_question_images/parts_telescope_18151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the objective lens?\nChoices:\n(A) P\n(B) K\n(C) D\n(D) X" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5, "loss": 4.548947334289551 }, { "id": "identity_120772", "image": "VQA2.0/images/59281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrellas?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.0, "loss": 4.1046552658081055 }, { "id": "identity_28076", "image": "DocVQA/images/mhpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of study is mentioned in this letter?" }, { "from": "gpt", "value": "The answer is Study on soft drinks" } ], "clip_score": 19.375, "loss": 4.021658420562744 }, { "id": "identity_336231", "image": "PMC-VQA/images/PMC4418099_Fig1_383036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the hematoma located in this patient's brain?\nChoices:\n(A) Right frontal scalp.\n(B) Left frontal scalp.\n(C) Occipital scalp.\n(D) Temporal scalp." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 4.001284599304199 }, { "id": "identity_268125", "image": "A-OKVQA/images/567572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the brush located between the doll's legs typically used?\nChoices:\n(A) teeth\n(B) skin\n(C) nails\n(D) hair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.21875, "loss": 4.142240047454834 }, { "id": "identity_330449", "image": "PMC-VQA/images/PMC8184106_FIG2_461212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does CT mean?\nChoices:\n(A) Computerized Treatment \n(B) Computed Tomography \n(C) Cardiac Testing \n(D) Cranial Trauma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.15625, "loss": 4.365437030792236 }, { "id": "identity_28616", "image": "PMC-VQA/images/PMC7758316_F3_325762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed inside the lumen of the small artery? \nChoices:\n(A) Blood clot \n(B) Fibrocartilaginous embolus \n(C) Fat embolus \n(D) Calcium embolus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.59375, "loss": 4.4568376541137695 }, { "id": "identity_16067", "image": "PMC-VQA/images/PMC4559168_Fig2_420521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the indication of the red arrow in the image? \nChoices:\n(A) Patella \n(B) Bursitis \n(C) Hip Fracture \n(D) No indication given. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.40625, "loss": 4.364975452423096 }, { "id": "identity_292341", "image": "PMC-VQA/images/PMC7364197_fig1_214940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is the significance of the white arrowhead pointing to?\nChoices:\n(A) Reactive bone \n(B) Marrow infection \n(C) Fracture position \n(D) Nerve damage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.502471446990967 }, { "id": "identity_239654", "image": "VQA2.0/images/87027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the picture are wearing both shorts and a sleeveless shirt?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 19.5, "loss": 4.297791004180908 }, { "id": "identity_287441", "image": "A-OKVQA/images/250344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for her being wrapped in the blanket?\nChoices:\n(A) is cold\n(B) is hiding\n(C) is confused\n(D) showing off" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.109375, "loss": 4.441582679748535 }, { "id": "identity_284068", "image": "IconQA/images/47623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.5189619064331055 }, { "id": "identity_196284", "image": "VQA-AS/images/17691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are visible in the fireplace?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.09375, "loss": 4.485270023345947 }, { "id": "identity_29990", "image": "VQA2.0/images/237685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 24.5625, "loss": 4.1232590675354 }, { "id": "identity_33341", "image": "PMC-VQA/images/PMC4489085_F4_401982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the plant is shown in image D? \nChoices:\n(A) Upper surface of median leaf \n(B) Section of lower surface of stem \n(C) Upper surface of dorsal sporophyll \n(D) Section of upper surface of stem. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.375, "loss": 4.318035125732422 }, { "id": "identity_78165", "image": "ScienceQA/images/6044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What adaptation allows the plants in the image to thrive in their environment?" }, { "from": "gpt", "value": "The answer is Aerial roots adaptation" } ], "clip_score": 28.765625, "loss": 4.5475006103515625 }, { "id": "identity_222503", "image": "A-OKVQA/images/59815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the image participating in the event?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 13.5625, "loss": 4.395794868469238 }, { "id": "identity_182698", "image": "PMC-VQA/images/PMC6440287_F1_459012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'b' indicate in the lower part of the image?" }, { "from": "gpt", "value": "The answer is Imaging parameters abbreviations" } ], "clip_score": 26.84375, "loss": 4.355820655822754 }, { "id": "identity_315692", "image": "TQA/images/abc_question_images/parts_fish_12852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part is indicated by label F?\nChoices:\n(A) DORSAL FIN\n(B) CAUDAL FIN\n(C) PECTORAL FIN\n(D) VENTRAL FIN" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.59375, "loss": 4.304073333740234 }, { "id": "identity_319690", "image": "FigureQA/images/46545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.015625, "loss": 4.52617073059082 }, { "id": "identity_24752", "image": "AI2D/images/4483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image represents erose?\nChoices:\n(A) N\n(B) L\n(C) D\n(D) Y" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.4296875, "loss": 4.013810157775879 }, { "id": "identity_283556", "image": "A-OKVQA/images/273579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the elephant currently engaged in?\nChoices:\n(A) drinking\n(B) painting picture\n(C) eating lunch\n(D) resting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.890625, "loss": 4.447274208068848 }, { "id": "identity_333882", "image": "PMC-VQA/images/PMC4087253_fig2_304569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the nature of the vessels shown in this image?\nChoices:\n(A) Large veins\n(B) Arteries\n(C) Capillaries\n(D) Lymph vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.78125, "loss": 4.029744625091553 }, { "id": "identity_194073", "image": "A-OKVQA/images/496362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.328125, "loss": 4.176215171813965 }, { "id": "identity_305275", "image": "ScienceQA/images/19895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown in green at the bottom of the map?\nChoices:\n(A) Antarctica\n(B) Australia\n(C) South America\n(D) Asia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.859375, "loss": 4.2551398277282715 }, { "id": "identity_295693", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black asterisk in the image represent?\nChoices:\n(A) Connective tissue \n(B) M2 macrophages \n(C) Newly formed bone \n(D) Xenogeneic BSM granules." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.529706001281738 }, { "id": "identity_284282", "image": "PMC-VQA/images/PMC3970346_fig1_277589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are some of the features exhibited by the glioma observed in the MRI scan?\nChoices:\n(A) Decreased mass effect \n(B) Homogeneous contrast enhancement \n(C) Thinly outlined lesion \n(D) Minimal necrosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.53125, "loss": 4.052308082580566 }, { "id": "identity_282876", "image": "PMC-VQA/images/PMC4661912_ijms-16-26054-f004_448410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which perspective displays the tracer activity from the side of the brain?\nOptions:\n(A) Sagittal\n(B) Coronal\n(C) Transverse\n(D) Frontal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0625, "loss": 4.3140974044799805 }, { "id": "identity_199039", "image": "PMC-VQA/images/PMC5093274_fig6_90987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar length at the bottom right of the image indicating?" }, { "from": "gpt", "value": "The answer is A reference for measurement" } ], "clip_score": 13.296875, "loss": 4.729117393493652 }, { "id": "identity_294755", "image": "PMC-VQA/images/PMC7044387_ccr32665-fig-0001_125308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging sequence was utilized to identify the dilation of the third and lateral ventricles?\nChoices:\n(A) T2\n(B) FLAIR\n(C) T1\n(D) DWI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.375, "loss": 4.128356456756592 }, { "id": "identity_34864", "image": "IconQA/images/8807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a boat. Which picture is second?\nChoices:\n(A) crab\n(B) boat\n(C) bucket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.765625, "loss": 4.168710708618164 }, { "id": "identity_212574", "image": "PMC-VQA/images/PMC4346664_Fig1_363459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple areas are there in the lower section of the diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.03125, "loss": 4.195810794830322 }, { "id": "identity_289099", "image": "VQA2.0/images/534195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe the red car?" }, { "from": "gpt", "value": "The answer is honda" } ], "clip_score": 17.09375, "loss": 4.447865962982178 }, { "id": "identity_18866", "image": "FigureQA/images/48773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.453125, "loss": 4.308957576751709 }, { "id": "identity_217063", "image": "A-OKVQA/images/448983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are in the image?" }, { "from": "gpt", "value": "The answer is Nine" } ], "clip_score": 15.0234375, "loss": 4.125248432159424 }, { "id": "identity_9012", "image": "PMC-VQA/images/PMC6930684_Fig1_90642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to obtain the presented images? \nChoices:\n(A) Computed Tomography \n(B) Magnetic Resonance Imaging \n(C) X-Ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.640625, "loss": 4.19952392578125 }, { "id": "identity_15026", "image": "PMC-VQA/images/PMC3883696_pone-0085128-g001_257142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the images provided? \nChoices:\n(A) infiltrating tumour cells \n(B) normal bone cells \n(C) osteoblasts \n(D) reactive new bone cells " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.203125, "loss": 5.121403694152832 }, { "id": "identity_332441", "image": "A-OKVQA/images/551194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What family of animals do the animals that the children are riding belong to?\nChoices:\n(A) bovidae\n(B) felidae\n(C) equidae\n(D) canidae" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0703125, "loss": 4.002545356750488 }, { "id": "identity_31494", "image": "A-OKVQA/images/198437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman in yellow doing on the horse?\nChoices:\n(A) feeding\n(B) competing\n(C) learning\n(D) cleaning" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.28125, "loss": 4.717179775238037 }, { "id": "identity_276768", "image": "A-OKVQA/images/196944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the zebra wearing a hat?\nChoices:\n(A) stay warm\n(B) for photograph\n(C) showing off\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.627697944641113 }, { "id": "identity_34490", "image": "ScienceQA/images/1058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Cuba\n(B) Jamaica\n(C) Haiti\n(D) Dominica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.15625, "loss": 4.5818681716918945 }, { "id": "identity_14568", "image": "DVQA/images/bar_train_00091417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across all the datasets?" }, { "from": "gpt", "value": "The answer is chip" } ], "clip_score": 30.296875, "loss": 4.101561069488525 }, { "id": "identity_37141", "image": "FigureQA/images/78809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Black have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.984375, "loss": 4.531920909881592 }, { "id": "identity_205785", "image": "A-OKVQA/images/177253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of apples can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.65625, "loss": 4.16844367980957 }, { "id": "identity_262879", "image": "PMC-VQA/images/PMC2190758_F2_16013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In Figure 2b, what kind of lesions were identified? \nChoices:\n(A) Temporal lobe lesions \n(B) White matter lesions \n(C) Lacunar lesions \n(D) Periventricular lesions" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.4375, "loss": 4.405877113342285 }, { "id": "identity_335665", "image": "DocVQA/images/htng0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of people are listed in the document?" }, { "from": "gpt", "value": "The answer is invited-not present" } ], "clip_score": 17.546875, "loss": 6.227234840393066 }, { "id": "identity_182435", "image": "A-OKVQA/images/106661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle that is second from the left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.15625, "loss": 4.023680210113525 }, { "id": "identity_309192", "image": "A-OKVQA/images/298649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the people near the vehicle prefer to go?\nChoices:\n(A) kid's playground\n(B) sand pits\n(C) ocean\n(D) snow mountains" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.109375, "loss": 4.160167217254639 }, { "id": "identity_276531", "image": "A-OKVQA/images/286376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which sport is this board typically used?\nChoices:\n(A) skating\n(B) skateboarding\n(C) surfing\n(D) skiing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.21875, "loss": 4.160546779632568 }, { "id": "identity_189476", "image": "VQA2.0/images/384374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many locomotives are pulling the train?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.359375, "loss": 4.124081611633301 }, { "id": "identity_320191", "image": "ScienceQA/images/13649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) fragile\n(B) transparent\n(C) flexible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.484375, "loss": 4.407049655914307 }, { "id": "identity_16962", "image": "A-OKVQA/images/539041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of counter is shown?\nChoices:\n(A) coin\n(B) bathroom\n(C) kitchen\n(D) store" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.015625, "loss": 4.531561851501465 }, { "id": "identity_253399", "image": "VQA2.0/images/336291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of buildings can you identify in this urban setting, and what are their primary functions?" }, { "from": "gpt", "value": "The answer is Three types: residential/office, hotel, commercial" } ], "clip_score": 11.1875, "loss": 4.1643967628479 }, { "id": "identity_32707", "image": "ScienceQA/images/12780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Virginia?\nChoices:\n(A) Richmond\n(B) Arlington\n(C) Montgomery\n(D) Providence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.609375, "loss": 4.287956714630127 }, { "id": "identity_329580", "image": "ScienceQA/images/12004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Louisville\n(B) Columbia\n(C) Frankfort\n(D) Richmond" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.34375, "loss": 4.267512798309326 }, { "id": "identity_285379", "image": "PMC-VQA/images/PMC9217618_fig2_317191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of tissue is under observation in the pathological examination? \nOptions:\n(A) Kidney tissues \n(B) Lung tissues \n(C) Liver tissues \n(D) Stomach tissues" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.953125, "loss": 4.133824348449707 }, { "id": "identity_330407", "image": "A-OKVQA/images/544325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is the person with gloves standing in?\nChoices:\n(A) baseball field\n(B) ice rink\n(C) basketball court\n(D) soccer field" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.796875, "loss": 4.382866382598877 }, { "id": "identity_272633", "image": "ScienceQA/images/13517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Tennessee?\n(A) Nashville\n(B) Montgomery\n(C) Knoxville\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.609375, "loss": 4.1166486740112305 }, { "id": "identity_3203", "image": "Super-CLEVR/images/superCLEVR_new_001717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The metallic bicycle that is the same size as the dirtbike is what color?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 23.9375, "loss": 4.4905500411987305 }, { "id": "identity_193073", "image": "VQA2.0/images/576397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the horse's saddle?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 20.453125, "loss": 5.036781311035156 }, { "id": "identity_292434", "image": "A-OKVQA/images/251255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which word would be the most appropriate to describe the setting out of the following choices: sunny, snowy, rainy, or tsunami?\nChoices:\n(A) sunny\n(B) snowy\n(C) rainy\n(D) tsunami" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.015625, "loss": 5.040390968322754 }, { "id": "identity_272166", "image": "A-OKVQA/images/64149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With the item in the pink box, what will they clean? \nChoices:\n(A) face\n(B) floor\n(C) table\n(D) screen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.265625, "loss": 4.25789737701416 }, { "id": "identity_302866", "image": "FigureQA/images/75147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Orange the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.15625, "loss": 4.398375511169434 }, { "id": "identity_301442", "image": "A-OKVQA/images/548731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these cones orange?\nChoices:\n(A) camouflage\n(B) design\n(C) visibility\n(D) appealing color" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.015625, "loss": 4.264617919921875 }, { "id": "identity_3858", "image": "PMC-VQA/images/PMC7660178_materials-13-04723-f013_296117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the surface shown in the microstructure image? \nChoices:\n(A) Surface I \n(B) Surface II \n(C) Surface III \n(D) Surface IV " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.203125, "loss": 4.054007530212402 }, { "id": "identity_214881", "image": "A-OKVQA/images/222322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the umbrellas?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.890625, "loss": 4.006488800048828 }, { "id": "identity_276649", "image": "TextVQA/images/588c3eb6601cdb4e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shirt from osu?" }, { "from": "gpt", "value": "The answer is yes " } ], "clip_score": 15.5859375, "loss": 5.264490127563477 }, { "id": "identity_286560", "image": "PMC-VQA/images/PMC4588204_pharmaceutics-07-00334-f004_428396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images were produced? \nOptions:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.09375, "loss": 4.1426777839660645 }, { "id": "identity_289980", "image": "A-OKVQA/images/201252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is the woman with the purple umbrella holding up to her face?\nChoices:\n(A) phone\n(B) sandwich\n(C) drink\n(D) glasses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.0234375, "loss": 4.371549606323242 }, { "id": "identity_21869", "image": "VQA2.0/images/233404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this man coming home from college?" }, { "from": "gpt", "value": "The answer is home" } ], "clip_score": 17.28125, "loss": 5.003439426422119 }, { "id": "identity_311224", "image": "FigureQA/images/72413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.90625, "loss": 4.371327877044678 }, { "id": "identity_311709", "image": "PMC-VQA/images/PMC509297_pbio-0020225-g003_250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sections of the hippocampus are shown in the images?\nChoices:\n(A) Superior, medial and inferior.\n(B) Anterior, middle and posterior.\n(C) Lateral, medial and inferior.\n(D) Posterior, medial and anterior." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 4.2386298179626465 }, { "id": "identity_267594", "image": "TQA/images/question_images/cycle_rock_6741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of applying heat and pressure to igneous rock?\nChoices:\n(A) Turn to magma\n(B) Turns to sediment\n(C) Turns to sedimentary rock\n(D) Turns to metamorphic rock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.9375, "loss": 4.0478315353393555 }, { "id": "identity_232242", "image": "VizWiz/images/VizWiz_train_00017287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key morphological characteristics of this leaf, and how do they help in identifying its family and genus?" }, { "from": "gpt", "value": "The answer is Rosaceae family, possibly genus Prunus" } ], "clip_score": 32.15625, "loss": 4.116840839385986 }, { "id": "identity_2267", "image": "AI2D/images/2615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area does letter A represent\nChoices:\n(A) Tidal buldge's center of mass\n(B) Moon's gravitational pull\n(C) Earth's rotation\n(D) Earth's gravitational pull towards the moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.1875, "loss": 4.046440124511719 }, { "id": "identity_281376", "image": "PMC-VQA/images/PMC9102236_cancers-14-02274-f002_280298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the right panel of the image depict?\nChoices:\n(A) EUS-guided LA on porcine models after treatment\n(B) EUS-guided LA on pancreatic tissue before treatment\n(C) EUS-guided LA on pancreatic tissue after treatment\n(D) EUS-guided LA on porcine models before treatment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.15625, "loss": 4.386857986450195 }, { "id": "identity_223447", "image": "A-OKVQA/images/119299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen standing on the platform outside the train?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 28.359375, "loss": 4.002137184143066 }, { "id": "identity_311982", "image": "PMC-VQA/images/PMC4158140_fig1_318878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in this scan?\nChoices:\n(A) Tonsillitis\n(B) Sinusitis\n(C) Bronchitis\n(D) Pneumonia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.3417229652404785 }, { "id": "identity_123583", "image": "VQA2.0/images/370768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible contrails are there in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.140625, "loss": 4.154658317565918 }, { "id": "identity_330688", "image": "PMC-VQA/images/PMC7939202_F1_385981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the hypointensities located on this MRI?\nChoices:\n(A) Basal ganglia\n(B) Hippocampus\n(C) Corticospinal tracts\n(D) Corpus callosum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.28125, "loss": 4.254520416259766 }, { "id": "identity_303697", "image": "AI2D/images/1388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is a tube-like structure?\nChoices:\n(A) E\n(B) C\n(C) F\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.03125, "loss": 4.410641670227051 }, { "id": "identity_297285", "image": "PMC-VQA/images/PMC3206434_F1_113893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which location did the seed eventually move? \nChoices:\n(A) Right side of the middle abdomen\n(B) Right kidney\n(C) Left side of the middle abdomen\n(D) Left kidney" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.765625, "loss": 4.227513313293457 }, { "id": "identity_125409", "image": "PMC-VQA/images/PMC8840569_sensors-22-00821-f001_193301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the arrow in the image?" }, { "from": "gpt", "value": "The answer is Air crescent sign" } ], "clip_score": 24.984375, "loss": 4.543253421783447 }, { "id": "identity_331382", "image": "A-OKVQA/images/291476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of shot is this person making?\nChoices:\n(A) double\n(B) forehand\n(C) backhand\n(D) none" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.421875, "loss": 4.268077850341797 }, { "id": "identity_271958", "image": "ScienceQA/images/3358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the organism that serves as the secondary consumer in this food web:\n(A) phytoplankton\n(B) black rockfish\n(C) zooplankton\n(D) sea urchin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.171875, "loss": 4.589385032653809 }, { "id": "identity_207531", "image": "VQA2.0/images/26812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lemons are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.8125, "loss": 4.328179359436035 }, { "id": "identity_297248", "image": "PMC-VQA/images/PMC6409300_F1_447822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the mark on the upper left of the image indicate?\nChoices:\n(A) A higher quality image \n(B) A different part of the brain \n(C) A higher magnification of the marked area \n(D) The location of a tumor" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.6875, "loss": 4.083016395568848 }, { "id": "identity_4314", "image": "PMC-VQA/images/PMC7772687_f0005_332266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What becomes hard to distinguish with decreasing image resolution on the ex vivo trapezium image shown? \nChoices:\n(A) Cortisol \n(B) Pores near the cortex \n(C) Arthritis \n(D) Muscle fibers " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.828125, "loss": 4.026309013366699 }, { "id": "identity_293437", "image": "FigureQA/images/91722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Mint the option with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.546875, "loss": 4.006532669067383 }, { "id": "identity_298004", "image": "A-OKVQA/images/308034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cargo carried by the boat?\nChoices:\n(A) electronics\n(B) books\n(C) animals\n(D) food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.421875, "loss": 4.194429397583008 }, { "id": "identity_333362", "image": "PMC-VQA/images/PMC2966750_F0001_77672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific type of injury identified in this image?\nChoices:\n(A) Humeral fracture and Type IV AC separation \n(B) Coracoid fracture and Type III AC separation \n(C) Shoulder blade fracture and Type V AC separation \n(D) Clavicle fracture and Type II AC separation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 4.538779258728027 }, { "id": "identity_311362", "image": "FigureQA/images/67741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.2922139167785645 }, { "id": "identity_324584", "image": "A-OKVQA/images/386592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are just the letters SBwa visible on this sign?\nChoices:\n(A) broken bulbs\n(B) correct name\n(C) spray paint\n(D) fallen letters" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.390625, "loss": 4.045779228210449 }, { "id": "identity_309367", "image": "DVQA/images/bar_train_00051944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each set?" }, { "from": "gpt", "value": "The answer is band" } ], "clip_score": 18.4375, "loss": 4.066357612609863 }, { "id": "identity_22364", "image": "PMC-VQA/images/PMC6692028_pone.0220922.g001_22949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of the image in relation to the beak? \nChoices:\n(A) Sagittal \n(B) Transverse \n(C) Coronal \n(D) Oblique " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.296875, "loss": 4.651309967041016 }, { "id": "identity_203340", "image": "A-OKVQA/images/160996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots can be seen on the stove?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.703125, "loss": 4.573694705963135 }, { "id": "identity_328553", "image": "A-OKVQA/images/244231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the thin objects these birds are sitting on?\nChoices:\n(A) branches\n(B) poles\n(C) power lines\n(D) traps" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.046875, "loss": 4.1781182289123535 }, { "id": "identity_295085", "image": "VQA2.0/images/274715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man in motion?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 13.125, "loss": 4.302803039550781 }, { "id": "identity_316021", "image": "DVQA/images/bar_train_00046428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of bars have at least one bar with a value greater than 7?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 21.703125, "loss": 4.517829895019531 }, { "id": "identity_297218", "image": "A-OKVQA/images/574332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the man's coat being yellow in color?\nChoices:\n(A) dress code\n(B) fashion\n(C) visibility\n(D) camouflage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.0859375, "loss": 4.227931022644043 }, { "id": "identity_202829", "image": "TextVQA/images/0222dde37ac1175c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the basketball?" }, { "from": "gpt", "value": "The answer is Molten" } ], "clip_score": 23.75, "loss": 4.027632713317871 }, { "id": "identity_324379", "image": "TextVQA/images/97844cef7a4c8568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who makes this product?" }, { "from": "gpt", "value": "The answer is no text in image" } ], "clip_score": 19.359375, "loss": 4.0640764236450195 }, { "id": "identity_326667", "image": "PMC-VQA/images/PMC6626925_F2_7538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sinus is affected in this image?\nChoices:\n(A) Maxillary sinus\n(B) Frontal sinus\n(C) Sphenoid sinus\n(D) Ethmoid sinus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.21875, "loss": 4.100343227386475 }, { "id": "identity_284286", "image": "A-OKVQA/images/386853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the presence of all these phones here?\nChoices:\n(A) stolen\n(B) being charged\n(C) owner rich\n(D) for sale" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 4.500292778015137 }, { "id": "identity_274474", "image": "AI2D/images/2130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram provided and your understanding of science, which organism labeled in the diagram is considered a producer?\nOptions:\n(A) aquatic insects\n(B) bass\n(C) zooplankton\n(D) phytoplankton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.71875, "loss": 4.580697536468506 }, { "id": "identity_26568", "image": "PMC-VQA/images/PMC8359591_Fig2_33276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the finding in the CT scan? \nChoices:\n(A) Smooth bone surface \n(B) Irregular erosions \n(C) Narrowing of joint space \n(D) Soft tissue swelling. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0, "loss": 4.322704792022705 }, { "id": "identity_20548", "image": "PMC-VQA/images/PMC7813024_ccr33524-fig-0004_344538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of image used in this study? \nChoices:\n(A) PET-CT \n(B) MRI T1-weighted \n(C) MRI T2-weighted \n(D) X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.03125, "loss": 4.711424350738525 }, { "id": "identity_52367", "image": "FigureQA/images/17375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is The title is \"title" } ], "clip_score": 37.90625, "loss": 4.324248790740967 }, { "id": "identity_288358", "image": "A-OKVQA/images/29709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of area is depicted in the image? \nOptions:\n(A) exercise\n(B) boarding\n(C) waiting\n(D) dining" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.59375, "loss": 4.3996968269348145 }, { "id": "identity_336077", "image": "FigureQA/images/18933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.5, "loss": 4.004281520843506 }, { "id": "identity_4233", "image": "VizWiz/images/VizWiz_train_00012687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the captcha code on this image?" }, { "from": "gpt", "value": "The answer is gfdg" } ], "clip_score": 23.03125, "loss": 4.015430927276611 }, { "id": "identity_255504", "image": "Super-CLEVR/images/superCLEVR_new_000750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of tiny jets and tiny purple metallic cruisers combined, and how does this compare to the number of other objects in the image?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 25.84375, "loss": 4.898553371429443 }, { "id": "identity_18489", "image": "ScienceQA/images/19008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Brooke's test show?\nChoices:\n(A) how much the drone weighed with the blade guards\n(B) if the blade guards would break in a crash\n(C) if adding the blade guards made the drone fly poorly" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 4.205617427825928 }, { "id": "identity_312516", "image": "AI2D/images/4651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What represents the hawk in the picture?\nChoices:\n(A) none of the above\n(B) a\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.53125, "loss": 4.5149149894714355 }, { "id": "identity_29823", "image": "A-OKVQA/images/322507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the skiers wearing numbers on their shirts?\nChoices:\n(A) for fun\n(B) competing\n(C) for fashion\n(D) to count" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.234375, "loss": 4.145395278930664 }, { "id": "identity_325849", "image": "PMC-VQA/images/PMC4932493_f1_47769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the colors in this image represent?\nChoices:\n(A) Bone growth stages,\n(B) Neurovascular pathways,\n(C) Tooth functions,\n(D) Cross-section views." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.828125, "loss": 4.538517475128174 }, { "id": "identity_210960", "image": "TextVQA/images/5afd3a7d403edeb8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings have visible logos or company names?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.875, "loss": 4.931972503662109 }, { "id": "identity_310261", "image": "A-OKVQA/images/67982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this coat made of?\nChoices:\n(A) pic\n(B) nylon\n(C) leather\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.53125, "loss": 4.2112016677856445 }, { "id": "identity_328336", "image": "PMC-VQA/images/PMC7296345_fig5_192972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dotted arrow in the image indicate?\nChoices:\n(A) calcification in the left upper quadrant\n(B) artefact on surface of fetal body\n(C) calcification in the right upper quadrant\n(D) artefact from the machine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.753045082092285 }, { "id": "identity_300684", "image": "Super-CLEVR/images/superCLEVR_new_000638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the cyan rubber thing that has the same shape as the big green rubber thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 17.765625, "loss": 4.062834739685059 }, { "id": "identity_103839", "image": "TextVQA/images/841f5ebf82965c1f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What iconic structure is depicted in the background of the advertisement?" }, { "from": "gpt", "value": "The answer is A castle" } ], "clip_score": 17.171875, "loss": 5.536497592926025 }, { "id": "identity_314632", "image": "TQA/images/question_images/human_system_digestive_1385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these parts of the digestive system is approximately 7 meters long in adults?\nChoices:\n(A) large intestine\n(B) small intestine\n(C) esophagus\n(D) rectum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 4.1748480796813965 }, { "id": "identity_56838", "image": "PlotQA/images/96094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of enrollment ratio for pre-primary schools from 2002 to 2010?" }, { "from": "gpt", "value": "The answer is It remains relatively constant" } ], "clip_score": 27.765625, "loss": 4.179966926574707 }, { "id": "identity_300027", "image": "VQA2.0/images/172947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the time on this correct?" }, { "from": "gpt", "value": "The answer is maybe" } ], "clip_score": 18.296875, "loss": 4.460755348205566 }, { "id": "identity_271026", "image": "A-OKVQA/images/135336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the cement blocks placed in the sea?\nChoices:\n(A) floaty\n(B) protecting shore\n(C) decoration\n(D) pier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.203125, "loss": 4.465897560119629 }, { "id": "identity_314818", "image": "FigureQA/images/78494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there an intersection between Turquoise and Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.359375, "loss": 4.001260757446289 }, { "id": "identity_269182", "image": "PlotQA/images/112850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the graph, where is the legend located?" }, { "from": "gpt", "value": "The answer is center right" } ], "clip_score": 18.1875, "loss": 4.571913242340088 }, { "id": "identity_132943", "image": "VQA-AS/images/6694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.265625, "loss": 4.203597545623779 }, { "id": "identity_241889", "image": "VQA2.0/images/278335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of sports equipment can you identify in this sports shop?" }, { "from": "gpt", "value": "The answer is Three types" } ], "clip_score": 17.796875, "loss": 4.730043411254883 }, { "id": "identity_127243", "image": "TextVQA/images/14a86de492158295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen walking on the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.59375, "loss": 4.777642250061035 }, { "id": "identity_105261", "image": "VizWiz/images/VizWiz_train_00006794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible on the counter?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.5625, "loss": 4.706139087677002 }, { "id": "identity_142521", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the meeting as per the email?" }, { "from": "gpt", "value": "The answer is DCRoomARN3500/OPEI" } ], "clip_score": 26.359375, "loss": 5.276418685913086 }, { "id": "identity_287997", "image": "A-OKVQA/images/488822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the rugs used? \nChoices:\n(A) moisture\n(B) decoration\n(C) seating\n(D) cleaning" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4921875, "loss": 4.047393321990967 }, { "id": "identity_317423", "image": "A-OKVQA/images/39456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this dish?\nChoices:\n(A) rugalach\n(B) crepe\n(C) blintz\n(D) croissant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.106993675231934 }, { "id": "identity_218259", "image": "TextVQA/images/f8bc673eb5663a49.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many jerseys are displayed in the glass case?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.609375, "loss": 4.297611713409424 }, { "id": "identity_335775", "image": "DVQA/images/bar_train_00163912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars have at least one bar with a value smaller than 8?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 26.3125, "loss": 4.136523246765137 }, { "id": "identity_40417", "image": "DVQA/images/bar_train_00121134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the least total sales across all stores?" }, { "from": "gpt", "value": "The answer is route" } ], "clip_score": 30.84375, "loss": 4.114245414733887 }, { "id": "identity_317135", "image": "TQA/images/abc_question_images/seasons_16282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option represents the Summer Solstice in this diagram?\nChoices:\n(A) S\n(B) Y\n(C) L\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.875, "loss": 4.088980674743652 }, { "id": "identity_21717", "image": "A-OKVQA/images/3786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the bench made of?\nChoices:\n(A) carbon fiber\n(B) metal\n(C) wood\n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.1328125, "loss": 4.305804252624512 }, { "id": "identity_199812", "image": "PMC-VQA/images/PMC4533796_Fig6_413971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in the tissue shown?" }, { "from": "gpt", "value": "The answer is Possible, indicated by the dashed box" } ], "clip_score": 30.109375, "loss": 4.992727279663086 }, { "id": "identity_285875", "image": "PMC-VQA/images/PMC3807398_F6_239221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which component in the image is colored green? \nChoices:\n(A) α-tubulin\n(B) nuclei\n(C) eGFP\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.96875, "loss": 4.261220455169678 }, { "id": "identity_337145", "image": "ScienceQA/images/9515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Madison\n(B) Saint Paul\n(C) Minneapolis\n(D) Columbus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 4.265504360198975 }, { "id": "identity_5331", "image": "A-OKVQA/images/466882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sport is played on a similar field?\nChoices:\n(A) soccer\n(B) racquetball\n(C) ice hockey\n(D) water polo" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.355062961578369 }, { "id": "identity_299280", "image": "TabMWP/images/9954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on Helen's $158.00, can she afford to purchase both a rug and a crystal sculpture?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.90625, "loss": 4.161740779876709 }, { "id": "identity_293918", "image": "A-OKVQA/images/334400.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the man's yellow shirt?\nChoices:\n(A) camouflage\n(B) visibility\n(C) dress code\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.5546875, "loss": 4.418759822845459 }, { "id": "identity_185499", "image": "VQA2.0/images/346653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing reflective vests?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.1875, "loss": 4.833737373352051 }, { "id": "identity_306800", "image": "ScienceQA/images/13973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system accurate or inaccurate?\nJupiter's volume is more than 1,000 times as great as Earth's volume.\nChoices:\n(A) accurate\n(B) inaccurate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.28125, "loss": 4.163765907287598 }, { "id": "identity_276305", "image": "ScienceQA/images/17251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism in the food web transfers matter that ultimately reaches the sea cucumber?\nChoices:\n(A) black rockfish\n(B) bat star\n(C) orca\n(D) kelp bass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 4.271422386169434 }, { "id": "identity_4985", "image": "A-OKVQA/images/361572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where could the person quickly get a great tropical fruit to add to their home made ice cream?\nChoices:\n(A) spain\n(B) next door\n(C) door dash\n(D) safeway" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.25, "loss": 4.012190818786621 }, { "id": "identity_203232", "image": "VQA-AS/images/3206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of frame style is the picture in?" }, { "from": "gpt", "value": "The answer is Simple dark frame" } ], "clip_score": 24.625, "loss": 4.594320774078369 }, { "id": "identity_20451", "image": "ScienceQA/images/18333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is steel a mineral?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5, "loss": 4.002680778503418 }, { "id": "identity_185963", "image": "PMC-VQA/images/PMC7232200_microorganisms-08-00582-f009_172750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the background in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 17.15625, "loss": 4.464954376220703 }, { "id": "identity_321758", "image": "PMC-VQA/images/PMC6441667_Fig11_459429.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the mandible was removed?\nChoices:\n(A) Right mandible \n(B) Left mandible \n(C) Upper mandible \n(D) Lower mandible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.088431358337402 }, { "id": "identity_19247", "image": "PMC-VQA/images/PMC9376875_f2_372531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green dots in the graph represent? \nChoices:\n(A) Trained items \n(B) Untrained items \n(C) Known items \n(D) Unknown items " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.4609375, "loss": 4.692453384399414 }, { "id": "identity_18101", "image": "FigureQA/images/80252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Blue Violet have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.543301582336426 }, { "id": "identity_281108", "image": "ScienceQA/images/12238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three objects? \nChoices:\n(A) rough\n(B) blue\n(C) bouncy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.325577735900879 }, { "id": "identity_326480", "image": "TQA/images/abc_question_images/human_system_respiratory_16199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the Diaphragm?\nChoices:\n(A) V\n(B) S\n(C) C\n(D) R" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.5, "loss": 4.385993480682373 }, { "id": "identity_185825", "image": "A-OKVQA/images/75581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is At least nine" } ], "clip_score": 14.28125, "loss": 4.119520664215088 }, { "id": "identity_215671", "image": "VQA2.0/images/73374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the skier's outfit?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 25.921875, "loss": 5.237734317779541 }, { "id": "identity_78020", "image": "AI2D/images/4620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the Augite mineral sample?" }, { "from": "gpt", "value": "The answer is Dark brown to black" } ], "clip_score": 32.46875, "loss": 4.253788948059082 }, { "id": "identity_116401", "image": "TextVQA/images/00a8d9ab7c3b9c0e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car behind the white car?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.640625, "loss": 4.247349739074707 }, { "id": "identity_330215", "image": "TabMWP/images/13532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 4.130471229553223 }, { "id": "identity_4171", "image": "AI2D/images/3245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the center of the nucleus?\nChoices:\n(A) nucleolus\n(B) cytoplasm\n(C) ribosome\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 4.238831520080566 }, { "id": "identity_121903", "image": "VQA-AS/images/8734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.578125, "loss": 4.241202354431152 }, { "id": "identity_17633", "image": "A-OKVQA/images/457725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the officer riding here?\nChoices:\n(A) parade route\n(B) brazil\n(C) fire\n(D) perp. capture" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.09375, "loss": 4.537752151489258 }, { "id": "identity_313218", "image": "A-OKVQA/images/200605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting does the person walk?\nChoices:\n(A) rural\n(B) circus\n(C) actor's studio\n(D) city" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.3359375, "loss": 4.44648551940918 }, { "id": "identity_337907", "image": "ScienceQA/images/13509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system accurate or inaccurate?\nNeptune's volume is more than 50 times that of Earth.\nChoices:\n(A) accurate\n(B) inaccurate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.196634769439697 }, { "id": "identity_321593", "image": "TextVQA/images/d071edbd42ab3b40.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word at the top of these icons?" }, { "from": "gpt", "value": "The answer is factory" } ], "clip_score": 18.015625, "loss": 4.273932933807373 }, { "id": "identity_303202", "image": "AI2D/images/4773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the Spike in the diagram?\nChoices:\n(A) D\n(B) F\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9375, "loss": 4.367006301879883 }, { "id": "identity_952", "image": "ScienceQA/images/12372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) shiny\n(B) sticky\n(C) fuzzy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.7734375, "loss": 4.412075996398926 }, { "id": "identity_222840", "image": "PMC-VQA/images/PMC3131277_pone-0021297-g001_101560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the highlighted area in the image on the right?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.65625, "loss": 4.017387866973877 }, { "id": "identity_316369", "image": "IconQA/images/45915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape shown here?\nChoices:\n(A) cone\n(B) sphere\n(C) cube\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0, "loss": 4.27951717376709 }, { "id": "identity_320868", "image": "FigureQA/images/87897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.359375, "loss": 4.052390098571777 }, { "id": "identity_294752", "image": "PMC-VQA/images/PMC4567815_Fig2_423015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the phase in which the image was captured from the following options:\n(A) Arterial phase\n(B) Late venous phase\n(C) Portal venous phase\n(D) Equilibrium phase" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.4375, "loss": 4.229776859283447 }, { "id": "identity_143125", "image": "DocVQA/images/nrmg0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the REMARKS for room 240?" }, { "from": "gpt", "value": "The answer is BLA" } ], "clip_score": 17.25, "loss": 4.476772785186768 }, { "id": "identity_20975", "image": "PMC-VQA/images/PMC6086413_pone.0201430.g004_352601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict? \nChoices:\n(A) Fossilized remains \n(B) Heated specimens \n(C) Rehydrated cells \n(D) Microscopic organisms " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.671875, "loss": 4.125962257385254 }, { "id": "identity_315096", "image": "A-OKVQA/images/414113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people here focused on?\nChoices:\n(A) snacks\n(B) arrest\n(C) sirens\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.328125, "loss": 4.251703262329102 }, { "id": "identity_74484", "image": "ScienceQA/images/14513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of nails in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.71875, "loss": 4.471961975097656 }, { "id": "identity_31651", "image": "PMC-VQA/images/PMC6878058_ccr32451-fig-0003_74099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the circle that indicates the location of the deep peroneal nerve? \nChoices:\n(A) Red \n(B) Yellow \n(C) Green \n(D) Blue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.3125, "loss": 4.803598403930664 }, { "id": "identity_278339", "image": "A-OKVQA/images/417547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organization conducts the baseball league?\nChoices:\n(A) MLB\n(B) IIT\n(C) TTS\n(D) NHT" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5625, "loss": 4.104081630706787 }, { "id": "identity_294198", "image": "A-OKVQA/images/393664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of lens was utilized to capture this image? \nOptions:\n(A) fish eye\n(B) cell phone\n(C) none\n(D) flat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.109375, "loss": 4.2685041427612305 }, { "id": "identity_315107", "image": "TextVQA/images/9a6693f2ed5e340e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is a sponsor name here?" }, { "from": "gpt", "value": "The answer is ilkka" } ], "clip_score": 21.796875, "loss": 4.94237756729126 }, { "id": "identity_189453", "image": "VizWiz/images/VizWiz_train_00012568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text that describes the shampoo as \"Nourishing\"?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 29.921875, "loss": 4.166356563568115 }, { "id": "identity_201131", "image": "A-OKVQA/images/44065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baskets are being carried by the stick?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.421875, "loss": 4.098780632019043 }, { "id": "identity_7199", "image": "FigureQA/images/73685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.890625, "loss": 4.522719383239746 }, { "id": "identity_76474", "image": "TQA/images/question_images/human_system_respiratory_6197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled with number 9?" }, { "from": "gpt", "value": "The answer is Gas exchange" } ], "clip_score": 20.84375, "loss": 4.055241584777832 }, { "id": "identity_220206", "image": "A-OKVQA/images/80480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.8125, "loss": 4.513300895690918 }, { "id": "identity_34301", "image": "IconQA/images/37808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape open or closed?\nChoices:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.40625, "loss": 4.319096565246582 }, { "id": "identity_22365", "image": "VQA2.0/images/128027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky cloudy?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 10.9765625, "loss": 4.326722621917725 }, { "id": "identity_214159", "image": "A-OKVQA/images/530896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are parked in the bike rack?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.359375, "loss": 4.042237281799316 }, { "id": "identity_22192", "image": "A-OKVQA/images/214213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of shot is the woman about to hit?\nChoices:\n(A) serve\n(B) backhand\n(C) forehand\n(D) slice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.359375, "loss": 4.4186787605285645 }, { "id": "identity_299676", "image": "ScienceQA/images/14939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is best suited for Kathleen's experiment to answer:\n(A) Whether slugs consume more from tomato leaves or broccoli leaves?\n(B) Whether slugs gain weight after consuming tomato leaves or broccoli leaves?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.359375, "loss": 4.142842769622803 }, { "id": "identity_324616", "image": "DocVQA/images/pjxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the President of this Dietetic Association?" }, { "from": "gpt", "value": "The answer is Sister Maude Behrman" } ], "clip_score": 25.28125, "loss": 4.049309253692627 }, { "id": "identity_337842", "image": "A-OKVQA/images/315195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is taking place in the picture?\nChoices:\n(A) fire\n(B) riot\n(C) running race\n(D) water leakage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.703125, "loss": 4.43947696685791 }, { "id": "identity_212605", "image": "VQA2.0/images/572894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct plants can be seen in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.75, "loss": 4.618009567260742 }, { "id": "identity_264258", "image": "TextVQA/images/2649511b64ff5498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is \"die\" a word in the Belgian language?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.4140625, "loss": 4.057007789611816 }, { "id": "identity_331547", "image": "A-OKVQA/images/558736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the zebras in the watering hole looking toward?\nChoices:\n(A) each other\n(B) giraffes\n(C) camera\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5, "loss": 4.268032073974609 }, { "id": "identity_275723", "image": "ScienceQA/images/10887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: Kiribati, Tuvalu, Tonga, or Fiji?\nChoices:\n(A) Kiribati\n(B) Tuvalu\n(C) Tonga\n(D) Fiji" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.421875, "loss": 4.122796058654785 }, { "id": "identity_248688", "image": "A-OKVQA/images/385248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles in the image are being held up by a kickstand?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 25.390625, "loss": 4.2109785079956055 }, { "id": "identity_275184", "image": "FigureQA/images/84470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.609375, "loss": 4.6585588455200195 }, { "id": "identity_329612", "image": "PMC-VQA/images/PMC7728654_Fig2_318208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image in d show?\nChoices:\n(A) Initial scan before treatment \n(B) Scan during treatment \n(C) Scan after treatment \n(D) No relation to treatment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.875, "loss": 4.685156345367432 }, { "id": "identity_277395", "image": "TQA/images/question_images/layers_of_atmosphere_7070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which layer connects the ozone to the exosphere among the following choices: outer space, troposphere, ionosphere, or tropopause?\nChoices:\n(A) outer space\n(B) troposphere\n(C) ionosphere\n(D) tropopause" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.40625, "loss": 4.202823638916016 }, { "id": "identity_209692", "image": "A-OKVQA/images/21213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of fruits are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.296875, "loss": 4.169755458831787 }, { "id": "identity_209036", "image": "PMC-VQA/images/PMC6386756_Fig7_439553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shade in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 23.640625, "loss": 4.626496315002441 }, { "id": "identity_299324", "image": "DVQA/images/bar_train_00030977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which value is smaller, sin in tool or grace in prayer?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.890625, "loss": 4.020026206970215 }, { "id": "identity_17554", "image": "PMC-VQA/images/PMC3016500_F0003_83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the post embolization angiogram? \nChoices:\n(A) Active contrast extravasation \n(B) Steel coils \n(C) Filling defects in the veins \n(D) Lack of blood flow " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 4.5201849937438965 }, { "id": "identity_306983", "image": "PMC-VQA/images/PMC7564512_jcm-09-03040-f002_275604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is impacted in the wrist?\nChoices:\n(A) Peripheral nerve\n(B) Tendon\n(C) Capsule\n(D) Fascia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.140625, "loss": 4.246319770812988 }, { "id": "identity_316687", "image": "TextVQA/images/9441b1942cfc32b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does it say to close this?" }, { "from": "gpt", "value": "The answer is for safety" } ], "clip_score": 14.140625, "loss": 4.579526901245117 }, { "id": "identity_133522", "image": "TextVQA/images/2235bca399bfd395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the side of the blue double-decker bus?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 17.859375, "loss": 4.5962700843811035 }, { "id": "identity_297810", "image": "TextVQA/images/e7fbafc374af79fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What text is visible to you?" }, { "from": "gpt", "value": "The answer is novia" } ], "clip_score": 12.2109375, "loss": 4.374706745147705 }, { "id": "identity_209767", "image": "VQA2.0/images/324705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the shirt of the person standing in the center?" }, { "from": "gpt", "value": "The answer is FOCI" } ], "clip_score": 13.9765625, "loss": 4.480315685272217 }, { "id": "identity_330667", "image": "A-OKVQA/images/109231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What place could the shirt refer to?\nChoices:\n(A) boston\n(B) piz palu\n(C) dresden\n(D) remich" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.9921875, "loss": 4.5954790115356445 }, { "id": "identity_329883", "image": "PMC-VQA/images/PMC6681608_fig6_19538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the malocclusion type here?\nChoices:\n(A) Skeletal Class III\n(B) Skeletal Class II\n(C) Skeletal Class I\n(D) Skeletal Open Bite" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.15625, "loss": 4.215332508087158 }, { "id": "identity_59996", "image": "DocVQA/images/prlf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What product is mentioned in the memorandum?" }, { "from": "gpt", "value": "The answer is Promise Margarine" } ], "clip_score": 15.5859375, "loss": 4.155331611633301 }, { "id": "identity_319873", "image": "A-OKVQA/images/295499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the large girl's t-shirt who is sitting on the bench during the basketball game?\nChoices:\n(A) orange\n(B) white\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.609375, "loss": 4.0831522941589355 }, { "id": "identity_335155", "image": "PMC-VQA/images/PMC6639941_Fig1_11393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the position of this structure suggest in the transthoracic echocardiography?\nChoices:\n(A) Transposition of the aorta and pulmonary trunk \n(B) Normal position of the pulmonary trunk \n(C) Left-sided ventricle opening of the pulmonary trunk \n(D) Aortic valve anomaly" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.03125, "loss": 4.033751487731934 }, { "id": "identity_198030", "image": "PMC-VQA/images/PMC7359123_acn351106-fig-0002_213637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the white areas in the brain on this MRI image?" }, { "from": "gpt", "value": "The answer is Indicate inflammation or tumors" } ], "clip_score": 35.15625, "loss": 4.249500751495361 }, { "id": "identity_306582", "image": "A-OKVQA/images/256529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the common term for these items with wheels?\nChoices:\n(A) wheelbarrow\n(B) luggage dolly\n(C) air cart\n(D) smart cart" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.953125, "loss": 4.160552978515625 }, { "id": "identity_275869", "image": "DocVQA/images/hglg0078_36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What information is contained in the Grade Field?" }, { "from": "gpt", "value": "The answer is CG3" } ], "clip_score": 21.640625, "loss": 4.31444787979126 }, { "id": "identity_118953", "image": "TextVQA/images/66e16785047c416a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bin in front of the wall?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.6875, "loss": 4.489922046661377 }, { "id": "identity_288048", "image": "ScienceQA/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic does this object possess?\nChoices:\n(A) breakable\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.515625, "loss": 4.068704128265381 }, { "id": "identity_285325", "image": "PMC-VQA/images/PMC3192142_pone-0026113-g001_111577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the orange dots in the image?\nChoices:\n(A) Optimal stimulation site \n(B) The coil orientation \n(C) Anatomically defined hand-knob \n(D) The mapping of the area around the anatomically defined hand-knob." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.640625, "loss": 4.295113563537598 }, { "id": "identity_316741", "image": "IconQA/images/51054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.75, "loss": 4.027345657348633 }, { "id": "identity_135106", "image": "ChartQA/images/26143890007033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of anxiety disorders in Suriname from 2004 to 2014?" }, { "from": "gpt", "value": "The answer is Constant" } ], "clip_score": 35.71875, "loss": 5.537491798400879 }, { "id": "identity_203964", "image": "VQA2.0/images/385326.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.5703125, "loss": 4.261074542999268 }, { "id": "identity_312414", "image": "DocVQA/images/shlk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the person hosting the segment on Nutrition and snack foods?" }, { "from": "gpt", "value": "The answer is Connie Bea Hope" } ], "clip_score": 21.96875, "loss": 4.789294242858887 }, { "id": "identity_312836", "image": "PMC-VQA/images/PMC4684472_pone.0144571.g003_454579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining technique was applied in the images?\nChoices:\n(A) PAS staining\n(B) Gram staining\n(C) Immunofluorescence staining\n(D) Nuclear hematoxylin staining" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.90625, "loss": 4.034777641296387 }, { "id": "identity_47489", "image": "MapQA/images/map_7465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range 3613-3681?" }, { "from": "gpt", "value": "The answer is Light Pink" } ], "clip_score": 18.109375, "loss": 4.227707386016846 }, { "id": "identity_128651", "image": "A-OKVQA/images/370337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be seen behind the white building?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.15625, "loss": 4.56389045715332 }, { "id": "identity_301839", "image": "DocVQA/images/gjbl0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are they going?" }, { "from": "gpt", "value": "The answer is China/Orient" } ], "clip_score": 14.625, "loss": 4.880214214324951 }, { "id": "identity_289339", "image": "A-OKVQA/images/459835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is displayed on one of the boats among the following options: United States, United Kingdom, France, or Sweden?\nChoices:\n(A) united states\n(B) united kingdom\n(C) france\n(D) sweden" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.859375, "loss": 4.051738739013672 }, { "id": "identity_148514", "image": "DocVQA/images/yhfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many copies will be run off automatically for the author?" }, { "from": "gpt", "value": "The answer is Ten copies" } ], "clip_score": 18.234375, "loss": 4.227782726287842 }, { "id": "identity_324755", "image": "DocVQA/images/sllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is research asst" } ], "clip_score": 18.453125, "loss": 5.730337619781494 }, { "id": "identity_294313", "image": "PMC-VQA/images/PMC4304244_F8_352529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of deformation is depicted in the findings?\nOptions:\n(A) Forward deformation\n(B) Backward deformation\n(C) Lateral deformation\n(D) Radial deformation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.200019359588623 }, { "id": "identity_9099", "image": "IconQA/images/61099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a tractor. Which picture is fourth?\nChoices:\n(A) sun\n(B) tractor\n(C) barn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.265625, "loss": 4.567837238311768 }, { "id": "identity_299007", "image": "A-OKVQA/images/58869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of seafood is the man currently enjoying outdoors? Is it:\n(A) catfish\n(B) squid\n(C) oysters\n(D) halibut" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.9375, "loss": 4.083005428314209 }, { "id": "identity_269499", "image": "VQA-AS/images/12665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the woman's pants coordinated with her couch?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.53125, "loss": 4.156309127807617 }, { "id": "identity_316700", "image": "PMC-VQA/images/PMC8063040_F1_423084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the view shown here?\nChoices:\n(A) Axial view\n(B) Coronal view\n(C) Sagittal view\n(D) Oblique view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.75, "loss": 4.546029567718506 }, { "id": "identity_327633", "image": "PMC-VQA/images/PMC6189522_fig4_382355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which brain region was the tissue sample taken?\nChoices:\n(A) Frontal lobe,\n(B) Parietal lobe,\n(C) Temporal lobe,\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.546875, "loss": 4.136236190795898 }, { "id": "identity_36872", "image": "IconQA/images/67396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.4375, "loss": 4.211043357849121 }, { "id": "identity_206105", "image": "A-OKVQA/images/89407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many side mirrors does the motorcycle have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.984375, "loss": 4.144587516784668 }, { "id": "identity_17946", "image": "AI2D/images/3141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the layer below the cuticle?\nChoices:\n(A) Air space\n(B) Epidermis\n(C) Vein\n(D) Stomata" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 4.299041271209717 }, { "id": "identity_313678", "image": "FigureQA/images/23747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6015625, "loss": 4.103335857391357 }, { "id": "identity_8297", "image": "TQA/images/abc_question_images/human_system_nervous_16176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the corpus callosum?\nChoices:\n(A) K\n(B) Y\n(C) L\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.84375, "loss": 4.146262168884277 }, { "id": "identity_325455", "image": "ScienceQA/images/19978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) fragile\n(B) stretchy\n(C) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.403870582580566 }, { "id": "identity_292979", "image": "IconQA/images/37881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture corresponds to the number eight in the sequence, given that the first picture is a sun? Choices are:\n(A) tractor\n(B) sun\n(C) barn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.8125, "loss": 4.245124816894531 }, { "id": "identity_332854", "image": "PMC-VQA/images/PMC6861597_fig0002_70162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is being corrected with N4 bias correction?\nChoices:\n(A) T1-weighted image\n(B) FLAIR image\n(C) T2-weighted image\n(D) DWI image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.984375, "loss": 4.687455177307129 }, { "id": "identity_289158", "image": "A-OKVQA/images/302076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probable rule concerning footwear that is likely enforced in this situation?\nChoices:\n(A) boots required\n(B) shoes off\n(C) none\n(D) cleats required" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.10546875, "loss": 4.7490644454956055 }, { "id": "identity_145101", "image": "DVQA/images/bar_train_00089303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the 'mode' algorithm outperformed the 'fit' algorithm?" }, { "from": "gpt", "value": "The answer is humor" } ], "clip_score": 31.96875, "loss": 4.706878662109375 }, { "id": "identity_313227", "image": "FigureQA/images/29123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Hot Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.703125, "loss": 4.058664798736572 }, { "id": "identity_18208", "image": "PMC-VQA/images/PMC7299599_f4_194033.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red circle indicating in the image? \nChoices:\n(A) True lumen of left CCA \n(B) A false lumen of left CCA \n(C) A thrombus in the true lumen of left CCA \n(D) An atherosclerotic plaque in true lumen of left CCA " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.203125, "loss": 4.75620174407959 }, { "id": "identity_324658", "image": "ScienceQA/images/11619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Pierre\n(B) Minneapolis\n(C) Wichita\n(D) Saint Paul" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.578125, "loss": 4.363068103790283 }, { "id": "identity_320241", "image": "A-OKVQA/images/299207.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person by the bags awaiting?\nChoices:\n(A) skateboarder\n(B) train\n(C) delivery\n(D) cab" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.859375, "loss": 4.178264617919922 }, { "id": "identity_132096", "image": "Super-CLEVR/images/superCLEVR_new_000947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.5, "loss": 4.61558723449707 }, { "id": "identity_67549", "image": "AI2D/images/4621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different mineral samples are displayed in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.25, "loss": 4.4578962326049805 }, { "id": "identity_334900", "image": "PMC-VQA/images/PMC5475293_F0002_180333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow indicate in image B?\nChoices:\n(A) LUS zone 2\n(B) LUS zone 3\n(C) LUS zone 1\n(D) LUS zone 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.71875, "loss": 4.101746559143066 }, { "id": "identity_323736", "image": "ScienceQA/images/6513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Vermont\n(B) New York\n(C) New Jersey\n(D) Mississippi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.71875, "loss": 5.006455898284912 }, { "id": "identity_69636", "image": "TQA/images/question_images/velocity_time_graphs_7658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many terminal velocities are indicated in the graph?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.125, "loss": 4.392727851867676 }, { "id": "identity_111922", "image": "VQA-AS/images/12996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object on the blanket?" }, { "from": "gpt", "value": "The answer is A football" } ], "clip_score": 22.6875, "loss": 4.872023582458496 }, { "id": "identity_18760", "image": "TextVQA/images/00fca8a2da7a4694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand of watch?" }, { "from": "gpt", "value": "The answer is zoppini" } ], "clip_score": 27.921875, "loss": 4.927937984466553 }, { "id": "identity_207928", "image": "A-OKVQA/images/304295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wreaths are attached to the wooden gates?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.609375, "loss": 4.610426425933838 }, { "id": "identity_25199", "image": "A-OKVQA/images/27041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is she working here?\nChoices:\n(A) left home\n(B) sunny outside\n(C) power source\n(D) is hiding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.609375, "loss": 4.264248371124268 }, { "id": "identity_275996", "image": "FigureQA/images/98313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Khaki or Medium Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.796875, "loss": 4.5528693199157715 }, { "id": "identity_336289", "image": "PMC-VQA/images/PMC5641467_F2_227206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the nature of the lesion shown here?\nChoices:\n(A) Benign \n(B) Malignant \n(C) Uncertain \n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.265625, "loss": 4.003844261169434 }, { "id": "identity_114035", "image": "VQA2.0/images/137779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the clock tower?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 27.15625, "loss": 4.161423683166504 }, { "id": "identity_16251", "image": "DVQA/images/bar_train_00065508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is lay" } ], "clip_score": 20.375, "loss": 5.065293312072754 }, { "id": "identity_263509", "image": "Super-CLEVR/images/superCLEVR_new_000505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the metallic object have that matches the color of the small metallic jet?" }, { "from": "gpt", "value": "The answer is fighter" } ], "clip_score": 21.984375, "loss": 4.553562641143799 }, { "id": "identity_22674", "image": "PMC-VQA/images/PMC4283670_ANNRHEUMDIS2014205839F3_348408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the affected joint in the left index finger of the patient? \nChoices:\n(A) Metacarpophalangeal joint \n(B) Distal interphalangeal joint \n(C) Proximal interphalangeal joint \n(D) Interphalangeal joint (not specified) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.046875, "loss": 4.522240161895752 }, { "id": "identity_298341", "image": "PMC-VQA/images/PMC4058497_fig1_298382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion location identified in the MRI scan?\nChoices:\n(A) Subarachnoid space \n(B) Cerebral cortex \n(C) Meckel's cavity \n(D) Brainstem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.421875, "loss": 4.284773826599121 }, { "id": "identity_5271", "image": "ScienceQA/images/20112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Delaware?\nChoices:\n(A) Dover\n(B) Topeka\n(C) Wilmington\n(D) Wichita" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.6875, "loss": 4.045027732849121 }, { "id": "identity_184978", "image": "PMC-VQA/images/PMC6723156_jcm-08-01197-f006_33060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the biological structure shown in the inset image on the top right corner?" }, { "from": "gpt", "value": "The answer is Mouse paw" } ], "clip_score": 26.6875, "loss": 4.3585205078125 }, { "id": "identity_289574", "image": "AI2D/images/3795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter A represent in the diagram? Is it the stem, fruit, root, or leaf?\nChoices:\n(A) stem\n(B) fruit\n(C) root\n(D) leaf" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.921875, "loss": 4.850185394287109 }, { "id": "identity_326665", "image": "ScienceQA/images/1180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Cuba\n(B) Haiti\n(C) Jamaica\n(D) Trinidad and Tobago" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.8125, "loss": 4.278233051300049 }, { "id": "identity_13159", "image": "A-OKVQA/images/471737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who invented this vehicle?\nChoices:\n(A) richard trevithick\n(B) orville wright\n(C) jeff goldblum\n(D) bill nye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.5234375, "loss": 4.154107570648193 }, { "id": "identity_299859", "image": "PMC-VQA/images/PMC6550046_DMM039065F2_489391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by each row in the figure?\nChoices:\n(A) Glass brain projection \n(B) Statistical analyses \n(C) Transverse section level \n(D) Coordinates relative to Bregma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.09375, "loss": 4.575872421264648 }, { "id": "identity_283124", "image": "A-OKVQA/images/114140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name for the small lights? Is it:\n(A) night lights\n(B) christmas lights\n(C) holiday lights\n(D) festival lights" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1484375, "loss": 4.496060371398926 }, { "id": "identity_15777", "image": "Super-CLEVR/images/superCLEVR_new_001438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the big yellow bicycle and the bicycle that is behind the big metal road bike made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.15625, "loss": 5.428262710571289 }, { "id": "identity_7388", "image": "ScienceQA/images/13853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this spring made of?\nChoices:\n(A) metal\n(B) wool" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.146944999694824 }, { "id": "identity_55356", "image": "DocVQA/images/xggn0226_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the patient's last name?" }, { "from": "gpt", "value": "The answer is Kindslatter" } ], "clip_score": 20.5, "loss": 5.763150691986084 }, { "id": "identity_27971", "image": "TabMWP/images/2162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5, "loss": 4.1600799560546875 }, { "id": "identity_304692", "image": "TQA/images/question_images/sun_layers_6309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the deepest part?\nChoices:\n(A) core\n(B) convective zone\n(C) prominence\n(D) radiative zone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.1875, "loss": 4.001710414886475 }, { "id": "identity_300273", "image": "PMC-VQA/images/PMC4047780_F3_295237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is shown in image (B)?\nChoices:\n(A) Panoramic radiograph\n(B) CBCT\n(C) MRI\n(D) CT scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.984375, "loss": 4.42189884185791 }, { "id": "identity_307467", "image": "DocVQA/images/grcv0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the department?" }, { "from": "gpt", "value": "The answer is Oper. Dev." } ], "clip_score": 15.484375, "loss": 4.466166019439697 }, { "id": "identity_200807", "image": "VQA2.0/images/507434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the mailbox in front of the grocery store?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.171875, "loss": 4.039979934692383 }, { "id": "identity_294847", "image": "A-OKVQA/images/250802.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are permissible for this individual to engage in while in the state of California?\nChoices:\n(A) practice medicine\n(B) teach\n(C) fish\n(D) drive" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5546875, "loss": 4.155457496643066 }, { "id": "identity_333501", "image": "A-OKVQA/images/348621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the social status of most people who own these?\n\nChoices:\n(A) poor\n(B) broke\n(C) wealthy\n(D) happy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.7265625, "loss": 4.460885047912598 }, { "id": "identity_289885", "image": "VQA2.0/images/162578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there plants filling the field?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.0390625, "loss": 4.1133904457092285 }, { "id": "identity_24364", "image": "ScienceQA/images/12224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the bird below.\nChoices:\n(A) California toad\n(B) keel-billed toucan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.1087493896484375 }, { "id": "identity_64265", "image": "ScienceQA/images/7259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.71875, "loss": 4.281064033508301 }, { "id": "identity_300466", "image": "FigureQA/images/61638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Steel Blue the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.78125, "loss": 4.2082438468933105 }, { "id": "identity_143064", "image": "MapQA/images/map_4031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value within the range of 43-58?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 23.609375, "loss": 4.025410175323486 }, { "id": "identity_329159", "image": "ScienceQA/images/14071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) rough\n(B) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.59375, "loss": 4.5666704177856445 }, { "id": "identity_12880", "image": "AI2D/images/3412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outer most part called?\nChoices:\n(A) Cell membrane\n(B) Chromosomes\n(C) Nuclear Membrane\n(D) Nucleus Membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.109375, "loss": 4.346972942352295 }, { "id": "identity_124362", "image": "TextVQA/images/fb196668a8a87336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing item is the person in the foreground holding in their left hand?" }, { "from": "gpt", "value": "The answer is A hat" } ], "clip_score": 21.15625, "loss": 4.47700834274292 }, { "id": "identity_315643", "image": "VizWiz/images/VizWiz_train_00013353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Hi, what is this?" }, { "from": "gpt", "value": "The answer is cup dutch couple dancing on" } ], "clip_score": 30.671875, "loss": 4.361808776855469 }, { "id": "identity_199107", "image": "VQA2.0/images/158451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batting team's uniform?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.40625, "loss": 4.64022159576416 }, { "id": "identity_294112", "image": "VQA2.0/images/331868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the curtains shut?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.140625, "loss": 4.468461036682129 }, { "id": "identity_257143", "image": "A-OKVQA/images/125120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the items on the table, what is the most likely beverage in the silver containers, and what evidence supports this conclusion?" }, { "from": "gpt", "value": "The answer is coffee" } ], "clip_score": 17.296875, "loss": 4.8151726722717285 }, { "id": "identity_27323", "image": "ScienceQA/images/5773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nSaturn's volume is more than 50% of Jupiter's volume.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.90625, "loss": 4.2948408126831055 }, { "id": "identity_64687", "image": "ScienceQA/images/13060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main roads can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.84375, "loss": 4.372536659240723 }, { "id": "identity_318655", "image": "PMC-VQA/images/PMC4660938_F0003_448112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are visible in this scan? \nChoices:\n(A) Corkscrew lesions \n(B) Sclerotic lesions \n(C) Lytic lesions \n(D) Hematoma lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.506516456604004 }, { "id": "identity_386", "image": "TextVQA/images/081e736f68b9cefb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time is it?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 15.875, "loss": 4.354374885559082 }, { "id": "identity_320325", "image": "PMC-VQA/images/PMC4816297_pone.0152505.g001_17082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow area under this curve represent?\nChoices:\n(A) Emphysematous tissue \n(B) Abnormal lung tissue \n(C) Normal lung tissue \n(D) Fluid-filled lung tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.859375, "loss": 4.785333156585693 }, { "id": "identity_194158", "image": "VQA2.0/images/2193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the pillow behind the stuffed animal?" }, { "from": "gpt", "value": "The answer is Circular patterns and lines" } ], "clip_score": 26.375, "loss": 4.143037796020508 }, { "id": "identity_35688", "image": "PMC-VQA/images/PMC6255699_F1_400724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is shown in the cross sections? \nChoices:\n(A) phloem tissue \n(B) xylem tissue \n(C) epidermis tissue \n(D) ground tissue. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 4.526159286499023 }, { "id": "identity_235383", "image": "TextVQA/images/1b1e3f57e92fdb2c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image, and what are the indicators that help identify it?" }, { "from": "gpt", "value": "The answer is It is a cargo train locomotive" } ], "clip_score": 30.359375, "loss": 4.017731189727783 }, { "id": "identity_331560", "image": "A-OKVQA/images/203986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What hard surface, other than the ground, are the spectators sitting on?\nChoices:\n(A) marble\n(B) bleachers\n(C) plastic\n(D) pavement" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6640625, "loss": 4.4284138679504395 }, { "id": "identity_280950", "image": "DVQA/images/bar_train_00145287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which category does the color forest green belong?" }, { "from": "gpt", "value": "The answer is cent" } ], "clip_score": 20.25, "loss": 5.272255897521973 }, { "id": "identity_150402", "image": "ScienceQA/images/17539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are fruits?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.25, "loss": 4.580242156982422 }, { "id": "identity_262450", "image": "VizWiz/images/VizWiz_train_00000723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of vent that is being used?" }, { "from": "gpt", "value": "The answer is return" } ], "clip_score": 25.296875, "loss": 5.960888862609863 }, { "id": "identity_176202", "image": "IconQA/images/97465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the objects in the second picture?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 19.453125, "loss": 4.150522708892822 }, { "id": "identity_28829", "image": "PMC-VQA/images/PMC3932416_F6_268892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown? \nChoices:\n(A) SEM image \n(B) CT scan image \n(C) Brightfield image \n(D) X-ray image " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.234375, "loss": 4.978348255157471 }, { "id": "identity_287132", "image": "PMC-VQA/images/PMC7790321_FIG7_336988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow arrows represent in the image?\nChoices:\n(A) Soft tissue growth. \n(B) Implants. \n(C) Bone formation. \n(D) Tooth decay." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.390625, "loss": 5.106164932250977 }, { "id": "identity_204485", "image": "VQA-AS/images/2220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.078125, "loss": 4.390751361846924 }, { "id": "identity_37643", "image": "VQA2.0/images/393121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of building in the picture?" }, { "from": "gpt", "value": "The answer is business" } ], "clip_score": 23.0625, "loss": 4.73272180557251 }, { "id": "identity_17000", "image": "ScienceQA/images/20114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) rough\n(B) sticky\n(C) soft" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6015625, "loss": 4.2030816078186035 }, { "id": "identity_33302", "image": "PMC-VQA/images/PMC5664263_fig1_233623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the age of the infarcts shown in the periventricular areas?\nChoices:\n(A) Chronic \n(B) Acute \n(C) Sub-acute \n(D) Unknown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.488386631011963 }, { "id": "identity_316606", "image": "A-OKVQA/images/200597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How might many who listen to this person hear the message?\nChoices:\n(A) through speakers\n(B) paper\n(C) sign language\n(D) interpreter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.65625, "loss": 4.1322021484375 }, { "id": "identity_296984", "image": "A-OKVQA/images/238385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone is shown in the image provided? \nOptions:\n(A) residential\n(B) traffic\n(C) business\n(D) shopping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.65625, "loss": 4.299054145812988 }, { "id": "identity_292817", "image": "PMC-VQA/images/PMC5579201_Fig6_210197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the confocal images depict?\nChoices:\n(A) Neurons and astrocytes\n(B) DA-L-IR cell bodies and projection patterns\n(C) Axons and dendrites\n(D) Blood vessels and capillaries" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.90625, "loss": 4.310153007507324 }, { "id": "identity_277263", "image": "A-OKVQA/images/245383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of vehicles is predominantly depicted in the image?\nChoices:\n(A) bicycles\n(B) trains\n(C) cars\n(D) motorcycles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.328125, "loss": 4.187193393707275 }, { "id": "identity_156528", "image": "ScienceQA/images/13419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents are intersected by the equator?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.640625, "loss": 4.274648666381836 }, { "id": "identity_22380", "image": "ScienceQA/images/6857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Frankfort\n(B) Harrisburg\n(C) Providence\n(D) Boise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.734375, "loss": 4.295744895935059 }, { "id": "identity_297992", "image": "VQA2.0/images/565938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of wheels does the stagecoach possess?" }, { "from": "gpt", "value": "The answer is wagon" } ], "clip_score": 27.9375, "loss": 4.031896591186523 }, { "id": "identity_201406", "image": "PMC-VQA/images/PMC4936830_F1_48915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the background in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.25, "loss": 4.140589714050293 }, { "id": "identity_7047", "image": "FigureQA/images/53633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.828125, "loss": 4.379960060119629 }, { "id": "identity_298549", "image": "A-OKVQA/images/8285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is at risk of falling? \n(A) prop\n(B) father\n(C) ladder\n(D) girl" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.0390625, "loss": 4.878443241119385 }, { "id": "identity_309622", "image": "A-OKVQA/images/336360.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the world is this scene located in?\nChoices:\n(A) northern\n(B) artic\n(C) middle east\n(D) southeastern" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.140625, "loss": 4.112743377685547 }, { "id": "identity_192418", "image": "TextVQA/images/da147a39299fe540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.9375, "loss": 4.87205696105957 }, { "id": "identity_321299", "image": "PMC-VQA/images/PMC3786075_Fig4_234333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the shape of the tip of the catheter used in the procedure?\nChoices:\n(A) Curved\n(B) Straight\n(C) Bendable\n(D) Pointed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 4.129755973815918 }, { "id": "identity_262631", "image": "Super-CLEVR/images/superCLEVR_new_001494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any large brown objects exist that are composed of the same material as the sizable gray object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.5546875, "loss": 5.035056114196777 }, { "id": "identity_320207", "image": "A-OKVQA/images/459824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this tray made from?\nChoices:\n(A) wood\n(B) plastic\n(C) steel\n(D) glass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.62109375, "loss": 4.115207195281982 }, { "id": "identity_313378", "image": "FigureQA/images/1925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.213141441345215 }, { "id": "identity_329387", "image": "PMC-VQA/images/PMC7532002_fig12_265168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to create this image?\nChoices:\n(A) Positron Emission Tomography (PET)\n(B) Magnetic Resonance Imaging (MRI)\n(C) Computed Tomography (CT)\n(D) Electroencephalogram (EEG)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.71875, "loss": 5.048610210418701 }, { "id": "identity_295841", "image": "A-OKVQA/images/314616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image shown?\nChoices:\n(A) thunderstorm\n(B) flooding\n(C) cow showering\n(D) raining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.75, "loss": 4.526358604431152 }, { "id": "identity_287850", "image": "IconQA/images/74460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on? \n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.018040657043457 }, { "id": "identity_329156", "image": "IconQA/images/69597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a plane. What is the second image?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.875, "loss": 4.823426246643066 }, { "id": "identity_30052", "image": "VizWiz/images/VizWiz_train_00003868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of table is this and what color?" }, { "from": "gpt", "value": "The answer is dinner table brown" } ], "clip_score": 28.546875, "loss": 4.474011421203613 }, { "id": "identity_13590", "image": "ScienceQA/images/19314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the tertiary consumer in this food web?\nChoices:\n(A) grizzly bear\n(B) snowy owl\n(C) parasitic jaeger" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 4.591182231903076 }, { "id": "identity_223632", "image": "VizWiz/images/VizWiz_train_00000701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many products are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.53125, "loss": 4.282034397125244 }, { "id": "identity_286600", "image": "ScienceQA/images/5682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nChoices:\n(A) yellow\n(B) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9921875, "loss": 4.093169212341309 }, { "id": "identity_30667", "image": "VQA2.0/images/548562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bird awake?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.359375, "loss": 4.000061511993408 }, { "id": "identity_65163", "image": "ScienceQA/images/5038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely content of the flask based on its appearance?" }, { "from": "gpt", "value": "The answer is Culture medium or reaction mixture" } ], "clip_score": 24.375, "loss": 4.0112104415893555 }, { "id": "identity_64243", "image": "ScienceQA/images/12954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many straps does the dress have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.09375, "loss": 4.3253607749938965 }, { "id": "identity_46783", "image": "DVQA/images/bar_train_00125430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for the green bar?" }, { "from": "gpt", "value": "The answer is wire" } ], "clip_score": 24.09375, "loss": 4.449929237365723 }, { "id": "identity_131972", "image": "A-OKVQA/images/111259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the raft?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.1875, "loss": 4.071889400482178 }, { "id": "identity_293692", "image": "PMC-VQA/images/PMC5795345_F5_270675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brain region depicted in the image provided? \nChoices:\n(A) right dorsal striatal region\n(B) left ventral striatal region\n(C) right ventral striatal region\n(D) left dorsal striatal region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.09375, "loss": 4.271017551422119 }, { "id": "identity_262071", "image": "DVQA/images/bar_train_00183104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bottom element in each stack of bars?" }, { "from": "gpt", "value": "The answer is brief" } ], "clip_score": 25.21875, "loss": 4.698767185211182 }, { "id": "identity_332316", "image": "DVQA/images/bar_train_00091882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the highest total value?" }, { "from": "gpt", "value": "The answer is grave" } ], "clip_score": 24.046875, "loss": 4.457784175872803 }, { "id": "identity_263115", "image": "FigureQA/images/90355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Gray or Dark Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.558090686798096 }, { "id": "identity_37320", "image": "IconQA/images/28593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.921875, "loss": 4.420995235443115 }, { "id": "identity_63648", "image": "AI2D/images/3086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ribosomes are visible in the diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.25, "loss": 4.257017135620117 }, { "id": "identity_324442", "image": "PMC-VQA/images/PMC7427865_Fig1_234354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the medical procedure shown in the image?\nChoices:\n(A) Gastrointestinal endoscopy \n(B) Renal arterial embolization \n(C) Bronchoscopy \n(D) Colonoscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.009899139404297 }, { "id": "identity_282263", "image": "ScienceQA/images/9585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Vermont?\nChoices:\n(A) Trenton\n(B) Montpelier\n(C) Wilmington\n(D) Burlington" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.59375, "loss": 4.274165630340576 }, { "id": "identity_290779", "image": "A-OKVQA/images/62726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company owns the vehicles parked near the curb among the following options: Tesla, MTA, Huffy, or Yamaha?\nChoices:\n(A) tesla\n(B) mta\n(C) huffy\n(D) yamaha" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.578125, "loss": 4.316441535949707 }, { "id": "identity_329997", "image": "A-OKVQA/images/523349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the instrument in the picture used for?\nChoices:\n(A) snowboarding\n(B) skiing\n(C) surfing\n(D) skating" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.40625, "loss": 4.325883865356445 }, { "id": "identity_114396", "image": "PMC-VQA/images/PMC7393149_Fig1_222964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colors are used to represent different structures in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.5, "loss": 4.075954437255859 }, { "id": "identity_322946", "image": "PMC-VQA/images/PMC3168817_F3_107656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this CT scan indicate about the maxillary sinus?\nChoices:\n(A) improvement\n(B) no change\n(C) deterioration\n(D) inflammation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.4692606925964355 }, { "id": "identity_328419", "image": "PMC-VQA/images/PMC6000706_Figure5_328815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the histological pattern of these cells?\nChoices:\n(A) Glandular \n(B) Tubular \n(C) Squamous \n(D) Columnar" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.109375, "loss": 4.543038845062256 }, { "id": "identity_271642", "image": "TQA/images/abc_question_images/photosynthesis_17478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the letter that symbolizes heat energy from the given choices: (A) Y, (B) V, (C) F, or (D) R." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.625, "loss": 4.208609580993652 }, { "id": "identity_321648", "image": "ScienceQA/images/12017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) soft\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.257802486419678 }, { "id": "identity_261271", "image": "A-OKVQA/images/521874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the train being so small?\nChoices:\n(A) small engineer\n(B) is broken\n(C) for children\n(D) is old" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.171875, "loss": 4.357369422912598 }, { "id": "identity_267011", "image": "FigureQA/images/25928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.671875, "loss": 4.356146812438965 }, { "id": "identity_291507", "image": "A-OKVQA/images/74135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the occupations of these individuals? \nOptions:\n(A) servers\n(B) butcher\n(C) dairy maids\n(D) fishers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.765625, "loss": 4.526955604553223 }, { "id": "identity_143799", "image": "DocVQA/images/ghgw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the SPEC REVISION code mentioned in the document?" }, { "from": "gpt", "value": "The answer is AC" } ], "clip_score": 23.015625, "loss": 4.297493934631348 }, { "id": "identity_295350", "image": "AI2D/images/1004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label on the diagram represents a flat foot?\nChoices:\n(A) Neutral pronation\n(B) Supination\n(C) None\n(D) Over pronation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.8125, "loss": 4.455997467041016 }, { "id": "identity_199567", "image": "PMC-VQA/images/PMC4098608_fig3_306121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the body is shown in the image?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 22.28125, "loss": 4.0442609786987305 }, { "id": "identity_331536", "image": "PMC-VQA/images/PMC7025078_fig0015_119199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image depict? \nChoices:\n(A) The presence of some posterior bladder wall diverticula \n(B) The presence of some anterior bladder wall diverticula \n(C) The presence of bladder tumors \n(D) The presence of bladder stones" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.8125, "loss": 4.592979431152344 }, { "id": "identity_303767", "image": "Super-CLEVR/images/superCLEVR_new_001654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same shape as the large metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.28125, "loss": 4.86862850189209 }, { "id": "identity_65785", "image": "TQA/images/abc_question_images/optics_reflection_19189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter 'W' represent in the diagram?" }, { "from": "gpt", "value": "The answer is Water surface or boundary" } ], "clip_score": 30.1875, "loss": 4.252859115600586 }, { "id": "identity_268756", "image": "Super-CLEVR/images/superCLEVR_new_000269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the material compositions of the large bicycle behind the blue rubber object and the small road bike the same?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.8125, "loss": 5.389452934265137 }, { "id": "identity_32752", "image": "AI2D/images/3915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents Molecular hydrogen?\nChoices:\n(A) C\n(B) A\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.322626113891602 }, { "id": "identity_20993", "image": "ScienceQA/images/1275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Lincoln\n(B) Atlanta\n(C) Columbia\n(D) Charleston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.333760738372803 }, { "id": "identity_46974", "image": "DVQA/images/bar_train_00005436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is sale" } ], "clip_score": 32.25, "loss": 4.436972141265869 }, { "id": "identity_16602", "image": "IconQA/images/66297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer acorns than squirrels?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.921875, "loss": 4.257726192474365 }, { "id": "identity_281845", "image": "A-OKVQA/images/13372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you name this location? \nOptions:\n(A) garage\n(B) dock\n(C) bunker\n(D) hangar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.4453125, "loss": 4.517321586608887 }, { "id": "identity_1778", "image": "ScienceQA/images/830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Louisiana?\nChoices:\n(A) Montpelier\n(B) Lansing\n(C) Baton Rouge\n(D) Birmingham" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.1303606033325195 }, { "id": "identity_118184", "image": "A-OKVQA/images/40912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.296875, "loss": 4.290557384490967 }, { "id": "identity_1094", "image": "AI2D/images/2146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which letter represents a youth shark?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.046875, "loss": 4.042079925537109 }, { "id": "identity_111355", "image": "A-OKVQA/images/208372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it in the image?" }, { "from": "gpt", "value": "The answer is Early evening" } ], "clip_score": 19.375, "loss": 4.186276912689209 }, { "id": "identity_42194", "image": "DVQA/images/bar_train_00000502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest accuracy value for the 'data' dataset?" }, { "from": "gpt", "value": "The answer is wit" } ], "clip_score": 30.578125, "loss": 4.459616184234619 }, { "id": "identity_36504", "image": "A-OKVQA/images/311471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction will the pink car go?\nChoices:\n(A) turn right\n(B) turn left\n(C) back up\n(D) go straight" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2265625, "loss": 4.493259429931641 }, { "id": "identity_328500", "image": "VQA2.0/images/135267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is used to see behind you" } ], "clip_score": 16.140625, "loss": 4.200137138366699 }, { "id": "identity_293106", "image": "A-OKVQA/images/329895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the leaves where the banana bunch is located?\nChoices:\n(A) brown\n(B) yellow\n(C) white\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.640625, "loss": 4.340987205505371 }, { "id": "identity_290734", "image": "A-OKVQA/images/337621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is distracting the skiers from concentrating on skiing?\nChoices:\n(A) snow\n(B) lift\n(C) their costumes\n(D) their boots" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.109375, "loss": 4.298802375793457 }, { "id": "identity_285651", "image": "PMC-VQA/images/PMC4332200_fig06_359756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of observation in the immunolocalization images displayed?\nChoices:\n(A) Liver cortices \n(B) Kidney cortices \n(C) Lung cortices \n(D) Brain cortices" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.921875, "loss": 4.566511631011963 }, { "id": "identity_336814", "image": "PMC-VQA/images/PMC7873062_Fig1_364493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the perspective of the slice in this image?\nChoices:\n(A) Posterior to anterior view\n(B) Inferior to superior view\n(C) Anterior to posterior view\n(D) Superior to inferior view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.125, "loss": 4.521976470947266 }, { "id": "identity_280432", "image": "A-OKVQA/images/439273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are they situated?\nChoices:\n(A) house\n(B) church\n(C) hospital\n(D) classroom" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.46875, "loss": 4.338598728179932 }, { "id": "identity_1209", "image": "TextVQA/images/6ee912ba0e67c958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first omelette being sold by the food truck?" }, { "from": "gpt", "value": "The answer is western" } ], "clip_score": 21.5, "loss": 4.819927215576172 }, { "id": "identity_27738", "image": "Super-CLEVR/images/superCLEVR_new_001616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of gray things right of the small metal cruiser the same as the number of green objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.65625, "loss": 5.151463031768799 }, { "id": "identity_30293", "image": "PMC-VQA/images/PMC7072331_cancers-12-00486-f001_134200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to detect focal lesions in the rachis? \nChoices:\n(A) CT \n(B) T1-weighted \n(C) T2-weighted STIR \n(D) PET " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.3125, "loss": 4.211389064788818 }, { "id": "identity_284726", "image": "TabMWP/images/9898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which game in the schedule ends at 3.40 P.M.?\nOptions:\n(A) Lightning versus Ducks game\n(B) Tigers versus Red Hawks game\n(C) Rattlesnakes versus Bears game\n(D) Vipers versus Crocodiles game" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.098919868469238 }, { "id": "identity_33396", "image": "PMC-VQA/images/PMC3446384_F6_156024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the imaging performed? \nChoices:\n(A) Lower side of the filter \n(B) Inside the cell \n(C) Upper side of the filter \n(D) Outside of the cell " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.171875, "loss": 4.7144880294799805 }, { "id": "identity_55733", "image": "FigureQA/images/56812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is The title is \"title" } ], "clip_score": 34.46875, "loss": 4.303465366363525 }, { "id": "identity_312566", "image": "PMC-VQA/images/PMC3274479_F2_124818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are shown in the figure?\nChoices:\n(A) CT scans\n(B) PET scans\n(C) T1-weighted MRI\n(D) T2-weighted MRI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.75, "loss": 4.255279064178467 }, { "id": "identity_288105", "image": "PMC-VQA/images/PMC4466355_fig1_396404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the CT scan three months prior to admission? \n(A) Abnormal growths in the lungs \n(B) Fluid buildup in the lungs \n(C) Air pockets in the lungs \n(D) Normal lung tissue appearance" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.2653374671936035 }, { "id": "identity_31143", "image": "A-OKVQA/images/470028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman boarding?\nChoices:\n(A) plane\n(B) horse\n(C) taxi\n(D) bus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.734375, "loss": 4.182229995727539 }, { "id": "identity_186959", "image": "TextVQA/images/edf3913fe963b3ef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifeboats can be seen on the cruise ship?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.4375, "loss": 4.472881317138672 }, { "id": "identity_306867", "image": "IconQA/images/6762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Answer using a time word, not a number. It is (_) past five." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 27.671875, "loss": 4.947359085083008 }, { "id": "identity_216130", "image": "A-OKVQA/images/424124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sports event is depicted in the image?" }, { "from": "gpt", "value": "The answer is Baseball" } ], "clip_score": 23.78125, "loss": 4.034165382385254 }, { "id": "identity_264561", "image": "FigureQA/images/68992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the item with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.78125, "loss": 4.716907501220703 }, { "id": "identity_642", "image": "A-OKVQA/images/181438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you were sitting in a chair what could you put your feet on?\nChoices:\n(A) log\n(B) ottoman\n(C) bench\n(D) table" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.625, "loss": 4.0452680587768555 }, { "id": "identity_21100", "image": "PMC-VQA/images/PMC8934646_F2_226881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of views are shown in the caption? \nChoices:\n(A) Side views \n(B) Top views \n(C) Close-up views \n(D) Full views " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.75, "loss": 4.154496192932129 }, { "id": "identity_156856", "image": "TQA/images/question_images/parts_worm_7299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many apertures are labeled on the earthworm diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.078125, "loss": 4.190780162811279 }, { "id": "identity_260780", "image": "TextVQA/images/b61ae0199772445c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined text of all the graffiti in the image?" }, { "from": "gpt", "value": "The answer is $meu$ SPACE MOTIVE" } ], "clip_score": 31.953125, "loss": 4.176008701324463 }, { "id": "identity_294554", "image": "A-OKVQA/images/149679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way is the action of the person in the photo with their leg described as?\nChoices:\n(A) spiritual\n(B) slightly rude\n(C) dangerous\n(D) perfectly normal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.28125, "loss": 4.6208176612854 }, { "id": "identity_219624", "image": "A-OKVQA/images/555118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible parked on the street?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.03125, "loss": 4.288386344909668 }, { "id": "identity_32502", "image": "PMC-VQA/images/PMC5893493_Fig9_299522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the most likely diagnosis of the patient? \nChoices:\n(A) Appendicitis \n(B) Acute cholecystitis \n(C) Aortic rupture \n(D) Pancreatitis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.640142917633057 }, { "id": "identity_78", "image": "PMC-VQA/images/PMC7806191_FIG3_341924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the rectum? \nChoices:\n(A) Anterior \n(B) Lateral \n(C) Posterior \n(D) Superior " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.968619346618652 }, { "id": "identity_323804", "image": "A-OKVQA/images/386712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What form of art was used to preserve these animals?\nChoices:\n(A) taxidermy\n(B) metalworking\n(C) sculpting\n(D) drawing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.765625, "loss": 4.337044715881348 }, { "id": "identity_282781", "image": "PMC-VQA/images/PMC5139043_Fig3_102563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to clearly display the focal area of moderate FCH uptake in the right prostatic bed?\nChoices:\n(A) MRI\n(B) CT\n(C) PET\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.46875, "loss": 4.013251781463623 }, { "id": "identity_20440", "image": "IconQA/images/80172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.734375, "loss": 4.075929641723633 }, { "id": "identity_227090", "image": "Super-CLEVR/images/superCLEVR_new_000751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the metallic bicycle and the red metal object in front of the tiny red thing both smaller than the green car?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 32.71875, "loss": 5.092662811279297 }, { "id": "identity_137051", "image": "DocVQA/images/lpkm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the connection type used for the transmission?" }, { "from": "gpt", "value": "The answer is SUB-ADDRESS" } ], "clip_score": 20.671875, "loss": 4.422696113586426 }, { "id": "identity_194804", "image": "A-OKVQA/images/150091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is depicted in the image?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 17.34375, "loss": 4.155980587005615 }, { "id": "identity_8652", "image": "FigureQA/images/70540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 4.107684135437012 }, { "id": "identity_15705", "image": "TQA/images/abc_question_images/parts_skeleton_17275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the mandible.\nChoices:\n(A) X\n(B) W\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.71875, "loss": 4.037747383117676 }, { "id": "identity_73146", "image": "AI2D/images/801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food chain?" }, { "from": "gpt", "value": "The answer is Three levels" } ], "clip_score": 25.84375, "loss": 4.621559143066406 }, { "id": "identity_111991", "image": "TextVQA/images/7b6df76509a1983b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the item labeled with \"120\" on the bottom shelf?" }, { "from": "gpt", "value": "The answer is Insoles" } ], "clip_score": 25.703125, "loss": 4.656788349151611 }, { "id": "identity_324895", "image": "A-OKVQA/images/511363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is needed for the toy to be used correctly?\nChoices:\n(A) manual\n(B) wind\n(C) directions\n(D) marshall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2734375, "loss": 4.145581245422363 }, { "id": "identity_289805", "image": "A-OKVQA/images/42805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of people uses the large umbrellas? (A) hawkers, (B) shoppers, (C) residents, or (D) shop owners?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.1875, "loss": 4.5501227378845215 }, { "id": "identity_27501", "image": "ScienceQA/images/19758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Provo\n(B) Trenton\n(C) Newark\n(D) Jersey City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.03125, "loss": 4.153749465942383 }, { "id": "identity_279022", "image": "A-OKVQA/images/249815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company is recognized for producing the numerous items available here?\nChoices:\n(A) Green Giant\n(B) Huffy\n(C) Popeyes\n(D) McDonald's" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.734375, "loss": 4.231534481048584 }, { "id": "identity_15734", "image": "DVQA/images/bar_train_00070699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.625, "loss": 4.216917514801025 }, { "id": "identity_236728", "image": "Super-CLEVR/images/superCLEVR_new_000288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color and type of the small metal thing left of the big cyan metal tandem bike?" }, { "from": "gpt", "value": "The answer is Gold plane" } ], "clip_score": 21.875, "loss": 4.056076526641846 }, { "id": "identity_30545", "image": "PMC-VQA/images/PMC8084928_Fig7_430849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrows in the image? \nChoices:\n(A) Fibrosis areas \n(B) Atrophy \n(C) Fat infiltration \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.125, "loss": 4.2540435791015625 }, { "id": "identity_188490", "image": "A-OKVQA/images/169562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many action figures can be seen on the wall?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.421875, "loss": 4.9354939460754395 }, { "id": "identity_64105", "image": "ScienceQA/images/5173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's throat?" }, { "from": "gpt", "value": "The answer is Dark Blue" } ], "clip_score": 27.609375, "loss": 4.9158806800842285 }, { "id": "identity_306257", "image": "DocVQA/images/zhfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this Mailbox?" }, { "from": "gpt", "value": "The answer is off" } ], "clip_score": 19.40625, "loss": 4.470250129699707 }, { "id": "identity_302411", "image": "TabMWP/images/6922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At this price, is there a shortage or a surplus?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.03125, "loss": 4.000668048858643 }, { "id": "identity_306265", "image": "TextVQA/images/b995fd81107bc80e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What model is this laptop?" }, { "from": "gpt", "value": "The answer is pabilion" } ], "clip_score": 25.671875, "loss": 4.699203968048096 }, { "id": "identity_337160", "image": "IconQA/images/47271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a turtle. What is the second image?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.491620063781738 }, { "id": "identity_31367", "image": "PMC-VQA/images/PMC6445604_F4_460291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white-dashed lines in the OCT B-scans indicate? \nChoices:\n(A) Healthy retina \n(B) Abnormal blood vessels \n(C) Choroidal hypertransmission signifying GA \n(D) Normal optic nerve " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.473835468292236 }, { "id": "identity_269891", "image": "Super-CLEVR/images/superCLEVR_new_000287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the purple metal cruiser equal to those of the matte motorbike in front of the green matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.9375, "loss": 4.865887641906738 }, { "id": "identity_296436", "image": "PMC-VQA/images/PMC3199062_fig4_112726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the colors in the image?\nChoices:\n(A) Intensity levels \n(B) Regions of interest \n(C) P-value thresholds \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.15625, "loss": 5.1424455642700195 }, { "id": "identity_296420", "image": "PMC-VQA/images/PMC3200069_fig1_112904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In Case 3, where are the radio-opaque shadows visible?\nChoices:\n(A) Right kidney\n(B) Left kidney\n(C) Liver\n(D) Spleen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.30648136138916 }, { "id": "identity_267802", "image": "ScienceQA/images/19269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Wyoming?\n(A) Honolulu\n(B) Cheyenne\n(C) Laramie\n(D) Tampa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.390625, "loss": 4.414278984069824 }, { "id": "identity_187355", "image": "A-OKVQA/images/208067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs are visible?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.59375, "loss": 4.36997127532959 }, { "id": "identity_17047", "image": "TQA/images/question_images/lewis_dot_diagrams_9144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents the atom of Hydrogen?\nChoices:\n(A) both\n(B) image on the right\n(C) left image\n(D) none" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.96875, "loss": 4.56857967376709 }, { "id": "identity_314200", "image": "PMC-VQA/images/PMC7903407_Fig1_373442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the observed signs in the chest CT scan?\nChoices:\n(A) Pulmonary Fibrosis and pleural effusion\n(B) Diffuse alveolar hemorrhage and multiple pulmonary nodules\n(C) Atelectasis and bronchial obstruction\n(D) COPD and Emphysema" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.40625, "loss": 4.289650917053223 }, { "id": "identity_285612", "image": "PMC-VQA/images/PMC6107703_F1_358484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of MRI maps are being compared in this study? \nOptions:\n(A) T1 and T2 maps \n(B) T2 and Trace maps \n(C) Trace and FLAIR maps \n(D) T1 and Trace maps" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 4.507629871368408 }, { "id": "identity_266578", "image": "Super-CLEVR/images/superCLEVR_new_000798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the red object that is the same size as the jet?" }, { "from": "gpt", "value": "The answer is articulated bus" } ], "clip_score": 21.03125, "loss": 4.091722011566162 }, { "id": "identity_271578", "image": "PMC-VQA/images/PMC5908617_F2_304238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what position was the patient placed for the X-ray imaging? \nOptions:\n(A) Lateral flexion \n(B) Prone position \n(C) Neutral position \n(D) Supine position" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.40625, "loss": 4.1416215896606445 }, { "id": "identity_7095", "image": "PMC-VQA/images/PMC4216464_RSTB20130541F4_332225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the difference between the left and right images? \nChoices:\n(A) Contrast \n(B) Focus \n(C) Color \n(D) Shape " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.7109375, "loss": 4.84451150894165 }, { "id": "identity_332571", "image": "AI2D/images/3954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Label \"I\" in the diagram indicates which part of the bird?\nChoices:\n(A) Flank\n(B) Lore\n(C) Crown\n(D) Breast" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.125, "loss": 4.3258442878723145 }, { "id": "identity_18974", "image": "DVQA/images/bar_train_00126383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third group of bars from the left?" }, { "from": "gpt", "value": "The answer is finger" } ], "clip_score": 23.9375, "loss": 4.9512152671813965 }, { "id": "identity_283146", "image": "A-OKVQA/images/525409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options best describes the location? \n(A) rodeo\n(B) amusement park\n(C) airport\n(D) daycare" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8359375, "loss": 4.836293697357178 }, { "id": "identity_198125", "image": "VQA2.0/images/246016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the baby's pants?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.390625, "loss": 4.041589260101318 }, { "id": "identity_304734", "image": "TQA/images/abc_question_images/optics_lense_types_19159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the convex lens?\nChoices:\n(A) S\n(B) U\n(C) L\n(D) K" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.859375, "loss": 4.425403118133545 }, { "id": "identity_104409", "image": "VQA2.0/images/458160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.21875, "loss": 4.940966606140137 }, { "id": "identity_333667", "image": "PMC-VQA/images/PMC6274901_ijms-19-03675-f001_406375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the angiograms of the common carotid arteries on both sides?\nChoices:\n(A) Calcification\n(B) Enlargement of lenticulostriate arteries\n(C) Hemorrhage\n(D) Normal arterial flow" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.34375, "loss": 4.838379383087158 }, { "id": "identity_269021", "image": "IconQA/images/24120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the ninth in the sequence if the first picture is a sub? \n(A) turtle \n(B) sub \n(C) fish" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.330591201782227 }, { "id": "identity_269771", "image": "IconQA/images/90346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.049227714538574 }, { "id": "identity_46560", "image": "DocVQA/images/xplf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are preparing a memorandum before the next meeting?" }, { "from": "gpt", "value": "The answer is Dr. Hennerich and Mr. Shank" } ], "clip_score": 24.4375, "loss": 4.319016456604004 }, { "id": "identity_275639", "image": "A-OKVQA/images/55594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the majority of items sold here originate from?\nChoices:\n(A) plants\n(B) people\n(C) factory\n(D) animals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.609375, "loss": 4.620854377746582 }, { "id": "identity_114692", "image": "TextVQA/images/3736f81be7095f5e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What English word is misspelled in the image?" }, { "from": "gpt", "value": "The answer is SuMme" } ], "clip_score": 13.0546875, "loss": 4.59984827041626 }, { "id": "identity_30737", "image": "PMC-VQA/images/PMC7514353_entropy-21-01049-f006_260703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red contour lines indicate in each figure? \nChoices:\n(A) Position of the vortices \n(B) Streamwise velocity values \n(C) Time interval \n(D) Normalized drift velocity values " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.53125, "loss": 4.556614875793457 }, { "id": "identity_280960", "image": "ScienceQA/images/11313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the letter \"i\" located in column 1?\nChoices:\n(A) the library\n(B) the restaurant\n(C) the grocery store\n(D) the park" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.84375, "loss": 4.370295524597168 }, { "id": "identity_334750", "image": "A-OKVQA/images/569747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of damage can be observed in this area?\nChoices:\n(A) cracked ground\n(B) illegal construction\n(C) graffiti\n(D) arson" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.703125, "loss": 4.494810104370117 }, { "id": "identity_212024", "image": "PMC-VQA/images/PMC7325100_Fig10_201909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any annotations or labels indicating a specific medical condition or finding?" }, { "from": "gpt", "value": "The answer is No specific condition indicated" } ], "clip_score": 20.921875, "loss": 4.685232162475586 }, { "id": "identity_52956", "image": "DocVQA/images/zpcl0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the group that received support for the work?" }, { "from": "gpt", "value": "The answer is the Oxford group" } ], "clip_score": 12.5390625, "loss": 5.2371721267700195 }, { "id": "identity_267764", "image": "ScienceQA/images/8577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\n(A) Minneapolis\n(B) Des Moines\n(C) Saint Paul\n(D) Indianapolis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.140625, "loss": 4.108168125152588 }, { "id": "identity_13298", "image": "A-OKVQA/images/550287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animals are shown?\nChoices:\n(A) stuffed\n(B) aquatic\n(C) wild\n(D) domestic" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.0625, "loss": 4.066420555114746 }, { "id": "identity_268886", "image": "IconQA/images/39609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.265625, "loss": 4.126960754394531 }, { "id": "identity_15828", "image": "VQA2.0/images/192994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the collar red?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.46875, "loss": 4.143231391906738 }, { "id": "identity_131145", "image": "TextVQA/images/211e79ec67ac7955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 11.6328125, "loss": 4.483708381652832 }, { "id": "identity_295064", "image": "Super-CLEVR/images/superCLEVR_new_001995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the purple object have, which is constructed from the same material as the small gray bus?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 31.265625, "loss": 4.732511043548584 }, { "id": "identity_284732", "image": "ScienceQA/images/17747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\n(A) Richmond\n(B) Birmingham\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.140625, "loss": 4.171444892883301 }, { "id": "identity_8423", "image": "FigureQA/images/17407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.109375, "loss": 4.458754062652588 }, { "id": "identity_268040", "image": "FigureQA/images/43306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold considered the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.71875, "loss": 4.299980640411377 }, { "id": "identity_36931", "image": "PMC-VQA/images/PMC7505619_FIG3_258226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging technique was used in this study? \nChoices:\n(A) CT scan \n(B) MRI scan \n(C) X-ray scan \n(D) PET scan. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.231977462768555 }, { "id": "identity_323034", "image": "A-OKVQA/images/557434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of item is likely being transported in the tallest box on the truck?\nChoices:\n(A) clothing\n(B) home appliance\n(C) food\n(D) auto parts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.9609375, "loss": 4.713376045227051 }, { "id": "identity_283646", "image": "PMC-VQA/images/PMC6378808_F2_437391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is being identified in the image?\nChoices:\n(A) White matter\n(B) Cerebrospinal fluid\n(C) Gray matter\n(D) Blood vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.265625, "loss": 4.2217278480529785 }, { "id": "identity_313162", "image": "DVQA/images/bar_train_00047917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is basket" } ], "clip_score": 26.296875, "loss": 5.176493167877197 }, { "id": "identity_281009", "image": "DocVQA/images/fhlc0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be found in the top-right corner of the document?" }, { "from": "gpt", "value": "The answer is PTD IN USA" } ], "clip_score": 11.2890625, "loss": 4.585141181945801 }, { "id": "identity_332618", "image": "PMC-VQA/images/PMC7823255_animals-11-00063-f002_348387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to obtain this image?\nChoices:\n(A) CT scan\n(B) MRI\n(C) B-mode\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 4.808737754821777 }, { "id": "identity_20008", "image": "PMC-VQA/images/PMC7212339_fig2_166119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main finding of the visual field test? \nChoices:\n(A) Normal visual field. \n(B) Peripheral constriction in the left optic nerve head. \n(C) Irregular peripheral constriction in the right optic nerve head. \n(D) Elevation in both optic nerve heads. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.765625, "loss": 4.070569038391113 }, { "id": "identity_295263", "image": "TextVQA/images/432b03478a52fc56.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the red boxes at the top?" }, { "from": "gpt", "value": "The answer is fenix" } ], "clip_score": 15.296875, "loss": 4.532519817352295 }, { "id": "identity_261272", "image": "A-OKVQA/images/254357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is this location in?\nChoices:\n(A) United States\n(B) Australia\n(C) Italy\n(D) Britain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.0703125, "loss": 4.223671913146973 }, { "id": "identity_183804", "image": "PMC-VQA/images/PMC4579293_F10_425971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain slices are shown in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.96875, "loss": 4.643263339996338 }, { "id": "identity_295912", "image": "PMC-VQA/images/PMC3879224_F3_256148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the brain is impacted by the infarcted regions?\nOptions:\n(A) Left insula and watershed.\n(B) Right insula and watershed.\n(C) Frontal cortex and anterior circulation.\n(D) Temporal lobe and middle cerebral artery territory." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.413392066955566 }, { "id": "identity_285685", "image": "A-OKVQA/images/99956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which US state is it most probable to find palm trees similar to the ones shown in the image?\nChoices:\n(A) Florida\n(B) Maine\n(C) Pennsylvania\n(D) New York" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.125, "loss": 4.062919616699219 }, { "id": "identity_317342", "image": "FigureQA/images/63221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.90625, "loss": 4.063464164733887 }, { "id": "identity_299188", "image": "A-OKVQA/images/244171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event are these individuals witnessing? \nOptions:\n(A) festival\n(B) fireworks\n(C) firestorm\n(D) bonfire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.366471767425537 }, { "id": "identity_108155", "image": "VQA2.0/images/422887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object's eyes?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 9.90625, "loss": 4.476345062255859 }, { "id": "identity_51243", "image": "MapQA/images/map_416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value greater than 20?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.765625, "loss": 4.606123447418213 }, { "id": "identity_295448", "image": "PMC-VQA/images/PMC5910015_FIG1_304740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the finding located on the CT scan according to the following choices: (A) Right temporal lobe, (B) Occipital lobe, (C) Left temporal lobe, or (D) Frontal lobe?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 4.163326263427734 }, { "id": "identity_201774", "image": "PMC-VQA/images/PMC7298109_F1_193581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the density of the area indicated by the arrow compared to the surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is Lower density" } ], "clip_score": 29.140625, "loss": 4.018880367279053 }, { "id": "identity_320215", "image": "Super-CLEVR/images/superCLEVR_new_000783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the small shiny object behind the small metallic aeroplane and the large red metal object shaped the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.625, "loss": 5.218289375305176 }, { "id": "identity_255199", "image": "PMC-VQA/images/PMC3767748_pone-0073297-g003_230614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall purpose of the image in (A) and how does the bottom right picture contribute to this purpose?" }, { "from": "gpt", "value": "The answer is To visualize and understand the spatial arrangement in the tomogram" } ], "clip_score": 17.09375, "loss": 4.481897354125977 }, { "id": "identity_322365", "image": "A-OKVQA/images/56837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people on the boards trying to do?\nChoices:\n(A) stand\n(B) lay\n(C) dive\n(D) jump" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.6875, "loss": 4.267891883850098 }, { "id": "identity_327114", "image": "PMC-VQA/images/PMC4304789_pone.0115861.g003_352618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the blue dash lines in the higher magnification of A-C?\nChoices:\n(A) Developed cushion area\n(B) Undeveloped cushion area\n(C) Epithelial area\n(D) Connective tissue area" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 5.131030082702637 }, { "id": "identity_210392", "image": "A-OKVQA/images/334320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cows are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.5, "loss": 4.250149726867676 }, { "id": "identity_8552", "image": "TQA/images/question_images/cycle_rock_6734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the first step in the formation process?\nChoices:\n(A) volcanic eruption\n(B) rock formation\n(C) erosion\n(D) breakup" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.03125, "loss": 4.24777889251709 }, { "id": "identity_205945", "image": "VQA2.0/images/265173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street signs are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.953125, "loss": 4.51474666595459 }, { "id": "identity_217968", "image": "PMC-VQA/images/PMC7947347_F1_387981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed in the lower row labeled 'c. LTC'?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.546875, "loss": 4.455341815948486 }, { "id": "identity_298871", "image": "TextVQA/images/25160c90086848b2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the wine located on the far left?" }, { "from": "gpt", "value": "The answer is massbach reserve" } ], "clip_score": 23.796875, "loss": 4.863457679748535 }, { "id": "identity_109125", "image": "Super-CLEVR/images/superCLEVR_new_001455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 20.359375, "loss": 4.420120716094971 }, { "id": "identity_106707", "image": "VQA2.0/images/382083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many newspaper vending machines are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.359375, "loss": 4.527313709259033 }, { "id": "identity_328705", "image": "PMC-VQA/images/PMC4877350_Fig9_33144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What description best represents the appearance of these pelvic masses?\nChoices:\n(A) Solid\n(B) Hypodense\n(C) Hollow\n(D) Calcified" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 4.424340724945068 }, { "id": "identity_31663", "image": "A-OKVQA/images/417849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is used to milk cows here?\nChoices:\n(A) machine\n(B) hands\n(C) cows\n(D) goats" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.453125, "loss": 4.4659223556518555 }, { "id": "identity_318595", "image": "FigureQA/images/70561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.1640625, "loss": 4.135357856750488 }, { "id": "identity_327295", "image": "ScienceQA/images/5773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement accurate regarding our solar system?\nSaturn's volume exceeds half of Jupiter's volume.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.59375, "loss": 4.098381996154785 }, { "id": "identity_67740", "image": "ScienceQA/images/5179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the earthworm in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer and prey" } ], "clip_score": 30.59375, "loss": 4.552145481109619 }, { "id": "identity_293138", "image": "ScienceQA/images/5654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category does ammonia fall into?\nChoices:\n(A) an elementary substance\n(B) a compound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.083459854125977 }, { "id": "identity_31735", "image": "IconQA/images/26103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.282937526702881 }, { "id": "identity_213422", "image": "A-OKVQA/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the motorcycle's body?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.203125, "loss": 5.063712120056152 }, { "id": "identity_303480", "image": "ScienceQA/images/18491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism in this food web has matter that eventually moves to the earthworm?\nChoices:\n(A) Arctic fox\n(B) mushroom\n(C) grizzly bear\n(D) lichen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.75, "loss": 4.650463104248047 }, { "id": "identity_52825", "image": "DVQA/images/bar_train_00077370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest \"soil\" value?" }, { "from": "gpt", "value": "The answer is corps" } ], "clip_score": 25.625, "loss": 4.690914154052734 }, { "id": "identity_36908", "image": "A-OKVQA/images/246029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the front man what is most protected?\nChoices:\n(A) knees\n(B) shins\n(C) chest\n(D) nose" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.2109375, "loss": 4.35266637802124 }, { "id": "identity_291378", "image": "FigureQA/images/17771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the highest value? \n\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.984375, "loss": 4.024025917053223 }, { "id": "identity_128054", "image": "A-OKVQA/images/79930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing helmets?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.84375, "loss": 4.581162452697754 }, { "id": "identity_263272", "image": "VizWiz/images/VizWiz_train_00001946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please inform me about the labels on the buttons located at the top and bottom of the inner circle? Thank you." }, { "from": "gpt", "value": "The answer is cant see inner circle" } ], "clip_score": 26.328125, "loss": 5.196876049041748 }, { "id": "identity_283754", "image": "ScienceQA/images/21093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Salem\n(B) Phoenix\n(C) Portland\n(D) Tucson" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.71875, "loss": 4.366739273071289 }, { "id": "identity_302843", "image": "ChartQA/images/28906613004157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the biggest one?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 13.78125, "loss": 4.881836891174316 }, { "id": "identity_240951", "image": "TQA/images/abc_question_images/simple_machines_19244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the object in the picture that is used to crack nuts and describe the function of each labeled part of this object" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 26.34375, "loss": 4.338958263397217 }, { "id": "identity_327048", "image": "A-OKVQA/images/375892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is beneath this number?\nChoices:\n(A) slug\n(B) tire\n(C) grass\n(D) paw" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.89453125, "loss": 4.53378438949585 }, { "id": "identity_308044", "image": "FigureQA/images/15963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.765625, "loss": 4.589919567108154 }, { "id": "identity_18765", "image": "A-OKVQA/images/264567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of career have these horsemen/women pursued?\nChoices:\n(A) cowboys\n(B) mounted police\n(C) town criers\n(D) performers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5, "loss": 4.329041481018066 }, { "id": "identity_276656", "image": "VizWiz/images/VizWiz_train_00016425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is visible to you?" }, { "from": "gpt", "value": "The answer is icons" } ], "clip_score": 22.8125, "loss": 4.946047306060791 }, { "id": "identity_119799", "image": "TextVQA/images/c56b6eeef46eff4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company associated with the patent on the plaque?" }, { "from": "gpt", "value": "The answer is Milners" } ], "clip_score": 28.609375, "loss": 4.3212103843688965 }, { "id": "identity_207181", "image": "PMC-VQA/images/PMC4016266_F1_287374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the bright orange area in the image?" }, { "from": "gpt", "value": "The answer is High metabolic activity, possible pathology" } ], "clip_score": 27.984375, "loss": 4.1366801261901855 }, { "id": "identity_293346", "image": "ScienceQA/images/8517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? \nOptions:\n(A) Kiribati\n(B) Nauru\n(C) Papua New Guinea\n(D) Vanuatu" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.125, "loss": 4.746691703796387 }, { "id": "identity_338446", "image": "IconQA/images/46988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this object flat or solid?\nChoices:\n(A) flat\n(B) solid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.453125, "loss": 4.466540336608887 }, { "id": "identity_295043", "image": "PMC-VQA/images/PMC4514952_Fig2_409253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the abnormalities observed in the brain?\nChoices:\n(A) temporal lobe \n(B) frontal lobe \n(C) parietal lobe \n(D) occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.953125, "loss": 4.3666815757751465 }, { "id": "identity_120184", "image": "VQA2.0/images/470691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.28125, "loss": 4.466786861419678 }, { "id": "identity_1819", "image": "ScienceQA/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) zooplankton\n(B) phytoplankton\n(C) kelp\n(D) plainfin midshipman" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.09375, "loss": 4.348895072937012 }, { "id": "identity_286123", "image": "IconQA/images/50140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of ovals an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.671875, "loss": 4.088288307189941 }, { "id": "identity_327289", "image": "DVQA/images/bar_train_00147325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of one of the stacks of bars from the bottom?" }, { "from": "gpt", "value": "The answer is guitar" } ], "clip_score": 28.375, "loss": 4.6487135887146 }, { "id": "identity_20684", "image": "AI2D/images/4393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is embracing the stem with its base?\nChoices:\n(A) Sagittate\n(B) Stemclasping\n(C) Cordate\n(D) Hastate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.921875, "loss": 4.2337822914123535 }, { "id": "identity_138559", "image": "MapQA/images/map_6216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for New York?" }, { "from": "gpt", "value": "The answer is a value range from the legend" } ], "clip_score": 21.5, "loss": 4.610359191894531 }, { "id": "identity_274440", "image": "A-OKVQA/images/60992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which product is manufactured by the company whose name is displayed on the right side of the wall? \n(A) witcher potions\n(B) crafts\n(C) baseball bats\n(D) eyeglasses" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 5.08203125, "loss": 4.195119857788086 }, { "id": "identity_48420", "image": "DVQA/images/bar_train_00046692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label on the Y-axis has the highest \"outfit\" value?" }, { "from": "gpt", "value": "The answer is meat" } ], "clip_score": 27.03125, "loss": 4.588496208190918 }, { "id": "identity_272793", "image": "TQA/images/abc_question_images/human_system_eye_16133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the retina among the following choices: S, H, A, or D?\nChoices:\n(A) S\n(B) H\n(C) A\n(D) D" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.90625, "loss": 4.664947986602783 }, { "id": "identity_335579", "image": "PMC-VQA/images/PMC7328922_F2_202997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by this arrow?\nChoices:\n(A) the tumor size \n(B) the location of the tumor \n(C) the shape of the tumor \n(D) the color of the tumor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.46875, "loss": 5.023141860961914 }, { "id": "identity_277837", "image": "TQA/images/question_images/seasons_1448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During September, when the Autumnal Equinox occurs in the northern hemisphere, what event takes place in the southern hemisphere at the same time?\nChoices:\n(A) Vernal Equinox\n(B) Winter Solstice\n(C) Summer Solstice\n(D) Autumnal Equinox" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.59375, "loss": 4.134126663208008 }, { "id": "identity_206608", "image": "A-OKVQA/images/147568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person doing in the image?" }, { "from": "gpt", "value": "The answer is Taping a baseball bat" } ], "clip_score": 26.765625, "loss": 4.3909196853637695 }, { "id": "identity_127381", "image": "Super-CLEVR/images/superCLEVR_new_001132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located to the left of the large brown vehicle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.15625, "loss": 4.501943111419678 }, { "id": "identity_18349", "image": "TQA/images/abc_question_images/human_system_digestive_13655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the lung?\nChoices:\n(A) W\n(B) S\n(C) P\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.828125, "loss": 4.428595542907715 }, { "id": "identity_33774", "image": "Super-CLEVR/images/superCLEVR_new_000942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the mountain bike that is the same material as the dirtbike?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 25.515625, "loss": 4.091912269592285 }, { "id": "identity_288117", "image": "A-OKVQA/images/487239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are they engaged in while in the mountains?\nChoices:\n(A) sightseeing\n(B) migrating\n(C) working\n(D) hunting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.15625, "loss": 4.3082451820373535 }, { "id": "identity_303352", "image": "AI2D/images/3846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in this diagram?\nChoices:\n(A) mother leaf\n(B) stem\n(C) fish leaf\n(D) normal leaf" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 4.858756065368652 }, { "id": "identity_261596", "image": "PMC-VQA/images/PMC6334377_fig3_423309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of vertebrate is referred to in the caption?\nChoices:\n(A) Cervical\n(B) Thoracic\n(C) Lumbar\n(D) Sacral" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.125, "loss": 4.312028408050537 }, { "id": "identity_58448", "image": "DocVQA/images/yscw0217_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is DWRITE 067827" } ], "clip_score": 23.0625, "loss": 4.030054092407227 }, { "id": "identity_35276", "image": "VQA2.0/images/237886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can wash their hands at a time in here?" }, { "from": "gpt", "value": "The answer is many" } ], "clip_score": 22.28125, "loss": 4.1926469802856445 }, { "id": "identity_314624", "image": "IconQA/images/69914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.635373592376709 }, { "id": "identity_290802", "image": "ScienceQA/images/4888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Barbados\n(B) Saint Lucia\n(C) Saint Kitts and Nevis\n(D) Saint Vincent and the Grenadines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.9375, "loss": 4.16842794418335 }, { "id": "identity_183126", "image": "A-OKVQA/images/172690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many volleyball courts are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.734375, "loss": 4.396954536437988 }, { "id": "identity_338035", "image": "IconQA/images/74873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.890625, "loss": 4.430695533752441 }, { "id": "identity_27421", "image": "VQA2.0/images/355357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ultimate goal of the sign?" }, { "from": "gpt", "value": "The answer is stop vehicles" } ], "clip_score": 24.140625, "loss": 5.252895832061768 }, { "id": "identity_237417", "image": "Super-CLEVR/images/superCLEVR_new_000678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image, and is this number greater than the sum of the number of small red rubber cars and the number of large utility bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.859375, "loss": 4.465631008148193 }, { "id": "identity_336267", "image": "IconQA/images/3977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this figure open or closed?\nChoices:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.4296875, "loss": 4.046294212341309 }, { "id": "identity_287011", "image": "FigureQA/images/67585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Black or Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.390625, "loss": 4.483781337738037 }, { "id": "identity_226403", "image": "A-OKVQA/images/205251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons that indicate the building behind the amusement is a financial building?" }, { "from": "gpt", "value": "The answer is Modern architecture, UBS logo, urban setting, professional environment" } ], "clip_score": 16.265625, "loss": 4.64792537689209 }, { "id": "identity_301519", "image": "Super-CLEVR/images/superCLEVR_new_000547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same color as this tiny rubber bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.9375, "loss": 5.083984851837158 }, { "id": "identity_284471", "image": "DocVQA/images/zrcd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What information is contained in the \"KC\" Field?" }, { "from": "gpt", "value": "The answer is jlh" } ], "clip_score": 17.625, "loss": 4.445191860198975 }, { "id": "identity_32276", "image": "Super-CLEVR/images/superCLEVR_new_000606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer big blue metal utility bikes than tiny brown metallic tandem bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.65625, "loss": 4.77104377746582 }, { "id": "identity_251910", "image": "FigureQA/images/97127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the smallest range of y-values, and what is that range?" }, { "from": "gpt", "value": "The answer is Dark Salmon, 10" } ], "clip_score": 25.078125, "loss": 4.3476152420043945 }, { "id": "identity_320171", "image": "A-OKVQA/images/557568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the forefront doing?\nChoices:\n(A) designated runner\n(B) practicing swing\n(C) at bat\n(D) stealing base" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.34375, "loss": 4.7553863525390625 }, { "id": "identity_244395", "image": "A-OKVQA/images/85706.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the number of people and the type of equipment they are using, what is the most likely difficulty level of the slope in the foreground, and why?" }, { "from": "gpt", "value": "The answer is beginner" } ], "clip_score": 27.109375, "loss": 5.889016628265381 }, { "id": "identity_136260", "image": "ChartQA/images/2940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest percentage of individuals who are only enthusiastic about their first choice?" }, { "from": "gpt", "value": "The answer is Liberal" } ], "clip_score": 24.4375, "loss": 4.147611141204834 }, { "id": "identity_332521", "image": "PMC-VQA/images/PMC3376883_F3_141804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the PET scan image indicate in the regions that received treatment? \nChoices:\n(A) inflammation \n(B) hot lesions \n(C) cold lesions \n(D) no change" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.6875, "loss": 5.291167259216309 }, { "id": "identity_326939", "image": "A-OKVQA/images/405456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who does this person most closely resemble?\nChoices:\n(A) moby\n(B) chris rock\n(C) jimmy smits\n(D) sanjay gupta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6796875, "loss": 4.153637409210205 }, { "id": "identity_291962", "image": "PMC-VQA/images/PMC6712714_Fig2_29638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the fracture located among the following choices: (A) Distal femur, (B) Proximal radius, (C) Distal radius, or (D) Proximal ulna?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.46875, "loss": 4.11307954788208 }, { "id": "identity_326053", "image": "A-OKVQA/images/35726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who wrote a book with a title that matches the word at the front of this boat?\nChoices:\n(A) joe hill\n(B) franz kafka\n(C) jack ryan\n(D) jim sturgess" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.796875, "loss": 4.33071231842041 }, { "id": "identity_323208", "image": "A-OKVQA/images/331031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the headband meant to look like?\nChoices:\n(A) unicorn horn\n(B) paws\n(C) antlers\n(D) hands" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.7109375, "loss": 4.430711269378662 }, { "id": "identity_188269", "image": "VizWiz/images/VizWiz_train_00009516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are visible on the table?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.03125, "loss": 4.554342269897461 }, { "id": "identity_8586", "image": "Super-CLEVR/images/superCLEVR_new_000724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the large object behind the blue car the same as the matte wagon?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.625, "loss": 5.413651943206787 }, { "id": "identity_286461", "image": "A-OKVQA/images/54485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the birds flying over the street? Is it:\n(A) white\n(B) black\n(C) gray\n(D) brown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.140625, "loss": 4.292158126831055 }, { "id": "identity_333588", "image": "VizWiz/images/VizWiz_train_00011516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The last picture I took, you mentioned strawberry ice cream? This is an Avon book, so it's someone holding an ice cream." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 33.75, "loss": 4.173397541046143 }, { "id": "identity_212146", "image": "A-OKVQA/images/125275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sign indicating the price?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 17.0, "loss": 4.973844051361084 }, { "id": "identity_335504", "image": "A-OKVQA/images/245361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a term related to this activity?\nChoices:\n(A) goal\n(B) homerun\n(C) surfs up\n(D) balance beam" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.015625, "loss": 4.080118656158447 }, { "id": "identity_202006", "image": "TextVQA/images/278a0fb214eb3f2c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tent visible in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.078125, "loss": 4.097683906555176 }, { "id": "identity_235070", "image": "Super-CLEVR/images/superCLEVR_new_001178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is directly in front of the gray motorbike and to the left of the small gray thing made of rubber?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 23.6875, "loss": 4.206206321716309 }, { "id": "identity_261669", "image": "A-OKVQA/images/269098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity is the standing lamp typically used for? Crafting, dancing, cooking, or sleeping?\nChoices:\n(A) crafting\n(B) dancing\n(C) cooking\n(D) sleeping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.2578125, "loss": 4.957027912139893 }, { "id": "identity_261889", "image": "ScienceQA/images/5011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms includes matter that was previously a part of the persimmon tree?\nChoices:\n(A) swallowtail caterpillar\n(B) beaver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.015625, "loss": 4.315927028656006 }, { "id": "identity_15052", "image": "A-OKVQA/images/442367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of area in town is this?\nChoices:\n(A) shopping area\n(B) industrial\n(C) residential area\n(D) office buildings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.0, "loss": 4.293990135192871 }, { "id": "identity_206440", "image": "PMC-VQA/images/PMC4891826_Fig2_37394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are predominantly shown in the image?" }, { "from": "gpt", "value": "The answer is the name of the cell type within ten words" } ], "clip_score": 22.453125, "loss": 4.3245768547058105 }, { "id": "identity_13784", "image": "PMC-VQA/images/PMC7028369_fig2s2_120390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the cells are shown in the images?\nChoices:\n(A) Dendrites \n(B) Axons \n(C) Soma \n(D) Nucleus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.015625, "loss": 4.731334686279297 }, { "id": "identity_324181", "image": "PMC-VQA/images/PMC4594743_Fig3_430082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MRI sequence is aligned parallel to the long axis of this structure?\nChoices:\n(A) Coronal T2W\n(B) Transverse T2W\n(C) Sagittal T1W\n(D) Axial T1W" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.421875, "loss": 4.439215660095215 }, { "id": "identity_35814", "image": "DVQA/images/bar_train_00163912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 8?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 26.40625, "loss": 4.136605262756348 }, { "id": "identity_36632", "image": "PMC-VQA/images/PMC6957557_Fig3_99164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate? \nChoices:\n(A) The previous gastrostomy site \n(B) The probe insertion site \n(C) The ulcer area after ESD \n(D) The healthy tissue around the ulcer area. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.78125, "loss": 4.647477149963379 }, { "id": "identity_15470", "image": "ScienceQA/images/9878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Frankfort\n(B) Carson City\n(C) Helena\n(D) Denver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.921875, "loss": 4.196539878845215 }, { "id": "identity_280866", "image": "FigureQA/images/88417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Purple or Medium Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.3125, "loss": 4.643699645996094 }, { "id": "identity_288221", "image": "PMC-VQA/images/PMC7787946_f0005_335962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for displaying the tracings in the image? \nChoices:\n(A) To compare the size of the thalamus between the two patients.\n(B) To compare the cognitive abilities of the two patients.\n(C) To show the difference in texture between the two parts of the thalamus.\n(D) To indicate the location of the thalamus in the brain." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.875, "loss": 4.329413890838623 }, { "id": "identity_334036", "image": "PMC-VQA/images/PMC3789311_fig2_235557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the ultrasound image show?\nChoices:\n(A) A homogeneous mass\n(B) A heterogeneous mass with a solid tumor\n(C) A simple cystic structure\n(D) An anechoic cystic structure with slightly echogenic thick walls" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.640625, "loss": 4.037278652191162 }, { "id": "identity_321124", "image": "PMC-VQA/images/PMC8684578_ccr35238-fig-0001_142173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the brain appears enlarged in the image?\nChoices:\n(A) Left side \n(B) Right side \n(C) Both sides \n(D) Neither side." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 4.493164539337158 }, { "id": "identity_14904", "image": "A-OKVQA/images/35328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the clear piece the railing on the right is attached to made of?\nChoices:\n(A) ceramic\n(B) brick\n(C) glass\n(D) wood" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.328125, "loss": 4.301690101623535 }, { "id": "identity_195343", "image": "A-OKVQA/images/659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the back of the orange vest of the man on the left?" }, { "from": "gpt", "value": "The answer is ASSISTANT" } ], "clip_score": 20.640625, "loss": 4.2753400802612305 }, { "id": "identity_293147", "image": "TabMWP/images/12625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Tina have sufficient funds to purchase both a wrench and a mop with her $18?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.609375, "loss": 4.08937931060791 }, { "id": "identity_53785", "image": "MapQA/images/map_2974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range of 248-414 hours?" }, { "from": "gpt", "value": "The answer is Light green" } ], "clip_score": 21.5625, "loss": 4.0307698249816895 }, { "id": "identity_328062", "image": "PMC-VQA/images/PMC7353793_Fig1_212030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the inflammatory lesion located on this chest CT scan? \nChoices:\n(A) Left upper lobe \n(B) Left lower lobe \n(C) Right upper lobe \n(D) Right lower lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.21875, "loss": 4.360049724578857 }, { "id": "identity_125342", "image": "VQA2.0/images/491490.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the water?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.4375, "loss": 4.423618793487549 }, { "id": "identity_265078", "image": "FigureQA/images/25525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orchid the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.6875, "loss": 4.683184623718262 }, { "id": "identity_96924", "image": "IconQA/images/730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the symmetry of the shape?" }, { "from": "gpt", "value": "The answer is Infinite lines of symmetry" } ], "clip_score": 20.4375, "loss": 4.107666015625 }, { "id": "identity_292623", "image": "TQA/images/question_images/earth_day_night_87.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phenomenon is caused by the sunlight hitting planet Earth?\nChoices:\n(A) Day and Night cycle\n(B) Equator\n(C) Rotation\n(D) Translation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 4.062933444976807 }, { "id": "identity_133865", "image": "TextVQA/images/a679ac728e700b84.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the image within the letter 'E'?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.90625, "loss": 4.777820110321045 }, { "id": "identity_312634", "image": "ScienceQA/images/13829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Spokane\n(B) Nampa\n(C) Denver\n(D) Boise" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.265625, "loss": 4.010528564453125 }, { "id": "identity_287920", "image": "Super-CLEVR/images/superCLEVR_new_001481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any small cyan helicopters be seen?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.5625, "loss": 5.011991024017334 }, { "id": "identity_103778", "image": "VQA-AS/images/2859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.734375, "loss": 4.034123420715332 }, { "id": "identity_138482", "image": "MapQA/images/map_260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall under the lowest percentage range category?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 24.765625, "loss": 4.232136249542236 }, { "id": "identity_297063", "image": "Super-CLEVR/images/superCLEVR_new_000985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any bicycles have the same color as the rubber wagon?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.390625, "loss": 5.141304969787598 }, { "id": "identity_291252", "image": "PMC-VQA/images/PMC3038968_F1_87161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What information did the abdominal CT scan provide regarding the tumor?\nChoices:\n(A) It is located in the upper abdomen \n(B) It is a small, even tumor \n(C) It is large and uneven \n(D) It is located in the back" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5625, "loss": 4.044366359710693 }, { "id": "identity_324315", "image": "A-OKVQA/images/56116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the white cup in front of this chair from?\nChoices:\n(A) wal mart\n(B) dunkin\n(C) wawa\n(D) starbucks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.546875, "loss": 4.111055850982666 }, { "id": "identity_184617", "image": "VQA2.0/images/570574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.828125, "loss": 4.1676836013793945 }, { "id": "identity_293872", "image": "ScienceQA/images/1810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New York\n(B) Iowa\n(C) New Jersey\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.03125, "loss": 5.01747989654541 }, { "id": "identity_73583", "image": "AI2D/images/8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of plants are depicted in the schematic root penetration patterns?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 36.1875, "loss": 4.158745765686035 }, { "id": "identity_221788", "image": "A-OKVQA/images/221004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headwear is the person on the right wearing?" }, { "from": "gpt", "value": "The answer is A hat" } ], "clip_score": 16.453125, "loss": 4.185611248016357 }, { "id": "identity_60305", "image": "DVQA/images/bar_train_00013016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has a consistently higher accuracy across all datasets?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 33.6875, "loss": 4.037325859069824 }, { "id": "identity_128942", "image": "PMC-VQA/images/PMC3350148_fig3_137475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the red color in the image indicate?" }, { "from": "gpt", "value": "The answer is Blood flow towards the probe" } ], "clip_score": 28.546875, "loss": 4.2095184326171875 }, { "id": "identity_1725", "image": "DVQA/images/bar_train_00196838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category does the lightgoldenrodyellow color represent?" }, { "from": "gpt", "value": "The answer is test" } ], "clip_score": 21.875, "loss": 4.01156759262085 }, { "id": "identity_31158", "image": "IconQA/images/23774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is second?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.262713432312012 }, { "id": "identity_337546", "image": "TextVQA/images/86401c9659fc9f6b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ad in the background about?" }, { "from": "gpt", "value": "The answer is no text in image" } ], "clip_score": 16.546875, "loss": 4.0516791343688965 }, { "id": "identity_309923", "image": "PMC-VQA/images/PMC6881471_Fig2_75067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is shown in the images provided?\nChoices:\n(A) normal breathing\n(B) apneic/hypopneic event\n(C) gasping\n(D) tachypnea" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.078125, "loss": 5.740005016326904 }, { "id": "identity_302681", "image": "A-OKVQA/images/221446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a collective name given to these food options?\nChoices:\n(A) fruits\n(B) meat\n(C) nuts\n(D) veggies" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.640625, "loss": 4.079894065856934 }, { "id": "identity_217499", "image": "A-OKVQA/images/43344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis rackets can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.625, "loss": 4.622246265411377 }, { "id": "identity_191866", "image": "PMC-VQA/images/PMC3797833_pone-0076655-g003_238197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images display the seed regions in the top row?" }, { "from": "gpt", "value": "The answer is Three images" } ], "clip_score": 20.859375, "loss": 5.057950496673584 }, { "id": "identity_335928", "image": "TextVQA/images/8b7c688892e36f16.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are on the field?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 19.40625, "loss": 4.6735100746154785 }, { "id": "identity_249540", "image": "A-OKVQA/images/539891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the image are wearing black shirts, and what are they doing?" }, { "from": "gpt", "value": "The answer is Two people, standing and holding/pointing" } ], "clip_score": 18.859375, "loss": 4.280500411987305 }, { "id": "identity_12099", "image": "AI2D/images/3724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The tongue is in the mouth true or false?\nChoices:\n(A) False it is in the stomach\n(B) True\n(C) False it is in the liver\n(D) False" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 4.18764591217041 }, { "id": "identity_106470", "image": "A-OKVQA/images/57361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.703125, "loss": 4.2812299728393555 }, { "id": "identity_295209", "image": "A-OKVQA/images/470174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is located at the center of the picture?\nChoices:\n(A) statue\n(B) tv\n(C) window\n(D) wood stove" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.140625, "loss": 4.226817607879639 }, { "id": "identity_17425", "image": "FigureQA/images/15273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Yellow the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.078125, "loss": 4.467284202575684 }, { "id": "identity_284060", "image": "DocVQA/images/slmm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country does Tony Fisher hail from?" }, { "from": "gpt", "value": "The answer is us" } ], "clip_score": 17.328125, "loss": 4.2248640060424805 }, { "id": "identity_255784", "image": "VQA-AS/images/10909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What objects are near the fire, and how many are there in total?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 24.75, "loss": 5.489182949066162 }, { "id": "identity_231647", "image": "PMC-VQA/images/PMC6563475_cne24657-fig-0009_493022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the probes against that are being used in the image, and how can you determine the specific cellular structures they are targeting?" }, { "from": "gpt", "value": "The answer is TRH and nuclei" } ], "clip_score": 29.40625, "loss": 4.609569072723389 }, { "id": "identity_292873", "image": "ScienceQA/images/4412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes the ecosystem of the Serengeti National Park?\nChoices:\n(A) Warm summers and warm winters.\n(B) Limited rainfall." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.296875, "loss": 5.303948879241943 }, { "id": "identity_290245", "image": "PMC-VQA/images/PMC4255819_fig02_341658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishes the two images provided?\nOptions:\n(A) Varied muscle imaging\n(B) Different time points for image capture\n(C) Varied angular velocities for image capture\n(D) Different types of contractions in the images." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.609375, "loss": 4.846177101135254 }, { "id": "identity_37648", "image": "PMC-VQA/images/PMC4245492_Fig1_339455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in lane 3 of the image? \nChoices:\n(A) SATB1 \n(B) SATB2 \n(C) Molecular weight marker \n(D) Negative control lysate " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.25, "loss": 4.8929901123046875 }, { "id": "identity_327251", "image": "A-OKVQA/images/3528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely relation of the individual pushing the stroller to the child in it?\nChoices:\n(A) grandmother\n(B) mother\n(C) cousin\n(D) aunt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.8824143409729 }, { "id": "identity_329346", "image": "PMC-VQA/images/PMC5289170_Fig2_122235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to capture this? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.953125, "loss": 4.485098838806152 }, { "id": "identity_9602", "image": "AI2D/images/3403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located at the very tail end?\nChoices:\n(A) Ventral tufts\n(B) Lateral hair\n(C) Caudal hair\n(D) Spiracular valves" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.0625, "loss": 4.5663652420043945 }, { "id": "identity_138356", "image": "DVQA/images/bar_train_00149797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for 'spite'?" }, { "from": "gpt", "value": "The answer is shame" } ], "clip_score": 30.0, "loss": 4.669302940368652 }, { "id": "identity_156852", "image": "AI2D/images/3615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary bronchi are visible in this diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.1875, "loss": 4.026366233825684 }, { "id": "identity_8777", "image": "DVQA/images/bar_train_00102022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 9?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 24.90625, "loss": 4.16982364654541 }, { "id": "identity_29297", "image": "A-OKVQA/images/399275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red vehicle?\nChoices:\n(A) van\n(B) tank\n(C) airplane\n(D) carriage" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.28125, "loss": 4.777195930480957 }, { "id": "identity_296035", "image": "AI2D/images/303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the food web, what role does the killer whale play? Is it a prey, producer, predator, or parasite?\nChoices:\n(A) Prey\n(B) Producer\n(C) Predator\n(D) Parasite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.1875, "loss": 4.233122825622559 }, { "id": "identity_1071", "image": "PMC-VQA/images/PMC6296120_Fig1_412051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the green arrow on the X-ray of the left proximal tibia? \nChoices:\n(A) An ill-defined lytic lesion \n(B) A well-defined lytic lesion \n(C) Blurred periosteal appositions \n(D) An area of geographic pattern " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.125, "loss": 4.9267168045043945 }, { "id": "identity_25399", "image": "A-OKVQA/images/237848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the foreground doing?\nChoices:\n(A) fishing\n(B) running\n(C) skateboarding\n(D) eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.65625, "loss": 4.3964715003967285 }, { "id": "identity_115762", "image": "PMC-VQA/images/PMC6010247_pone.0198078.g002_331241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the area highlighted in blue?" }, { "from": "gpt", "value": "The answer is Region of interest for examination/analysis" } ], "clip_score": 15.7109375, "loss": 4.045886039733887 }, { "id": "identity_288083", "image": "PMC-VQA/images/PMC7180539_ytaa024-F3_158057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view is depicted on the left side in the image?\nOptions:\n(A) Left parasternal long axis view\n(B) Right parasternal long axis view\n(C) Apical two chamber view\n(D) Apical four chamber view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.3125, "loss": 4.510183811187744 }, { "id": "identity_134257", "image": "DVQA/images/bar_train_00011491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest difference between 'actor' and 'inch' values?" }, { "from": "gpt", "value": "The answer is wise" } ], "clip_score": 22.375, "loss": 4.520162582397461 }, { "id": "identity_5543", "image": "DVQA/images/bar_train_00025232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category does the mediumpurple color represent?" }, { "from": "gpt", "value": "The answer is date" } ], "clip_score": 19.921875, "loss": 4.015027046203613 }, { "id": "identity_290010", "image": "ScienceQA/images/2726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\n(A) Saint Paul\n(B) Saint Louis\n(C) Helena\n(D) Minneapolis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.265625, "loss": 4.196938991546631 }, { "id": "identity_235557", "image": "Super-CLEVR/images/superCLEVR_new_000506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the metal bicycle and to the left of the airplane?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 29.71875, "loss": 4.510173320770264 }, { "id": "identity_9523", "image": "VQA2.0/images/258264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the chairs?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 20.953125, "loss": 4.126586437225342 }, { "id": "identity_276503", "image": "A-OKVQA/images/410779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is the orange item? \n(A) carrot \n(B) pumpkin pie \n(C) traffic cone \n(D) sticky note" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.5859375, "loss": 4.2132368087768555 }, { "id": "identity_273151", "image": "A-OKVQA/images/108303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Next to which food item is the fork placed? \n(A) chili\n(B) cherry pie\n(C) cake\n(D) steak" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.84375, "loss": 4.084294319152832 }, { "id": "identity_182363", "image": "TextVQA/images/e75daf900bcb8fc5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is placed directly in front of the book on the right side?" }, { "from": "gpt", "value": "The answer is A glass jar" } ], "clip_score": 30.234375, "loss": 4.011075496673584 }, { "id": "identity_2697", "image": "PMC-VQA/images/PMC5775587_Fig2_266134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used? \nChoices:\n(A) Computed tomography (CT) scan \n(B) Magnetic resonance imaging (MRI) scan \n(C) X-Ray scan \n(D) Positron emission tomography (PET) scan " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.1875, "loss": 4.237439155578613 }, { "id": "identity_203145", "image": "A-OKVQA/images/386272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.625, "loss": 4.41664981842041 }, { "id": "identity_284472", "image": "IconQA/images/17311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on, green or white?\nChoices:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.375, "loss": 4.206048488616943 }, { "id": "identity_36183", "image": "PMC-VQA/images/PMC5738007_F1_255092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical imaging technique used in this case? \nChoices:\n(A) Ultrasound \n(B) X-ray \n(C) MRI \n(D) PET scan " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.453125, "loss": 4.682555675506592 }, { "id": "identity_555", "image": "TQA/images/question_images/parts_neuron_7207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure present at the end of a cell, connecting it to another cell?\nChoices:\n(A) Synapse\n(B) Node of Ranvier\n(C) Axon\n(D) Oligodendrocyte" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.328125, "loss": 4.271205425262451 }, { "id": "identity_186569", "image": "VizWiz/images/VizWiz_train_00002672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many glasses of water are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.171875, "loss": 4.498282432556152 }, { "id": "identity_267166", "image": "DVQA/images/bar_train_00198239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label is located fourth from the left?" }, { "from": "gpt", "value": "The answer is tail" } ], "clip_score": 20.21875, "loss": 4.055333137512207 }, { "id": "identity_126418", "image": "A-OKVQA/images/331140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats in the image have visible red buoys attached to them?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.171875, "loss": 4.395731449127197 }, { "id": "identity_325231", "image": "TextVQA/images/0259621e0cf41ab9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is right above the green icon on the right side?" }, { "from": "gpt", "value": "The answer is journey" } ], "clip_score": 7.28125, "loss": 5.394545078277588 }, { "id": "identity_289788", "image": "A-OKVQA/images/14472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the blue item most likely to be used? \n(A) Rwanda \n(B) London \n(C) Cameroon \n(D) Egypt" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.6171875, "loss": 4.678105354309082 }, { "id": "identity_119690", "image": "VQA-AS/images/15328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.140625, "loss": 4.049821376800537 }, { "id": "identity_38112", "image": "A-OKVQA/images/360751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the group doing at the beach?\nChoices:\n(A) wedding\n(B) clam bake\n(C) bon fire\n(D) blanket bingo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.109375, "loss": 4.580721378326416 }, { "id": "identity_241030", "image": "PMC-VQA/images/PMC6478587_jbm410172-fig-0003_469610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the cells scattered in the image, and how does their structure support this function?" }, { "from": "gpt", "value": "The answer is Immune response; large nucleus for antibody production" } ], "clip_score": 22.578125, "loss": 4.155348300933838 }, { "id": "identity_184590", "image": "PMC-VQA/images/PMC7935200_FIG2_385034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure indicated by the red arrow?" }, { "from": "gpt", "value": "The answer is Appendix" } ], "clip_score": 29.84375, "loss": 4.027075290679932 }, { "id": "identity_104355", "image": "VQA2.0/images/350414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many propellers does the plane have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.359375, "loss": 4.187617301940918 }, { "id": "identity_285112", "image": "ScienceQA/images/5621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: the Dominican Republic, Saint Kitts and Nevis, Dominica, or Haiti?\nChoices:\n(A) the Dominican Republic\n(B) Saint Kitts and Nevis\n(C) Dominica\n(D) Haiti" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.703125, "loss": 4.080894470214844 }, { "id": "identity_328456", "image": "A-OKVQA/images/30973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where should the person with more weight sleep?\nChoices:\n(A) another room\n(B) floor\n(C) bottom\n(D) top" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.84375, "loss": 4.409560203552246 }, { "id": "identity_18889", "image": "A-OKVQA/images/46339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is odd about the mannequin in the foreground?\nChoices:\n(A) painted red\n(B) no pants\n(C) human\n(D) broken nose" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.078125, "loss": 4.032403469085693 }, { "id": "identity_31165", "image": "A-OKVQA/images/477919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type environment are these fruits grown in?\nChoices:\n(A) tropical\n(B) desert\n(C) tundra\n(D) temperate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.75, "loss": 4.191421985626221 }, { "id": "identity_103", "image": "FigureQA/images/89159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Orange have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.1635308265686035 }, { "id": "identity_314645", "image": "DVQA/images/bar_train_00037822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is lip" } ], "clip_score": 21.125, "loss": 4.16525936126709 }, { "id": "identity_192979", "image": "PMC-VQA/images/PMC4844105_pone.0154174.g006_23944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the arrow in the image point to?" }, { "from": "gpt", "value": "The answer is A layer within the cochlea" } ], "clip_score": 31.265625, "loss": 4.048269271850586 }, { "id": "identity_275680", "image": "PMC-VQA/images/PMC7382072_Fig8_220157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was employed to visualize the cells among the following options: Wright-Giemsa stain, Masson's trichrome stain, Hematoxylin and Eosin stain, or Toluidine blue stain?\nChoices:\n(A) Wright-Giemsa stain \n(B) Masson's trichrome stain \n(C) Hematoxylin and Eosin stain \n(D) Toluidine blue stain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.625, "loss": 4.091856479644775 }, { "id": "identity_187482", "image": "TextVQA/images/f440577028d6c129.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are standing in a wall formation?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.59375, "loss": 4.557680606842041 }, { "id": "identity_325300", "image": "AI2D/images/2531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label for the new moon phase.\nChoices:\n(A) D\n(B) I\n(C) F\n(D) B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.96875, "loss": 4.049062252044678 }, { "id": "identity_327217", "image": "A-OKVQA/images/46155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person hoping to keep up?\nChoices:\n(A) volley\n(B) net\n(C) complaints\n(D) emotions" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.734375, "loss": 4.760729789733887 }, { "id": "identity_324338", "image": "PMC-VQA/images/PMC9202151_Fig4_312502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the coronal reconstructed image?\nChoices:\n(A) Anterior ring injury\n(B) Posterior ring injury\n(C) Both anterior and posterior ring injury\n(D) No injury shown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.777807235717773 }, { "id": "identity_280347", "image": "ScienceQA/images/15217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) smooth\n(B) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.515625, "loss": 4.050239562988281 }, { "id": "identity_237567", "image": "TextVQA/images/6b0fef85e2aac76d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many words are there in the entire text on the image, and what is the word that appears exactly in the middle of the text?" }, { "from": "gpt", "value": "The answer is MEAN IT" } ], "clip_score": 17.46875, "loss": 4.883044242858887 }, { "id": "identity_310004", "image": "IconQA/images/29540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0625, "loss": 4.342484951019287 }, { "id": "identity_239134", "image": "TQA/images/question_images/earth_day_night_96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the given image, if the Earth rotates 90 degrees clockwise, which direction will be at the top of the Earth?" }, { "from": "gpt", "value": "The answer is East" } ], "clip_score": 31.453125, "loss": 4.0415544509887695 }, { "id": "identity_33087", "image": "PMC-VQA/images/PMC6189129_Fig2_382287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the electron diffraction pattern?\nChoices:\n(A) DNA sequence \n(B) X-ray diffraction \n(C) electron diffraction from the GNS \n(D) protein crystal structure" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.03125, "loss": 4.162741184234619 }, { "id": "identity_263648", "image": "ScienceQA/images/887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Camille's experiment to answer?\nChoices:\n(A) Can the humidity level at the planting site of tomato seeds impact the growth of tomato seedlings?\n(B) Is there a difference in the number of tomato seedlings that grow when planted in soil with fertilizer versus soil without fertilizer?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.18395471572876 }, { "id": "identity_216341", "image": "A-OKVQA/images/236882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the motorcycle that is second from the left?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 27.4375, "loss": 4.288126468658447 }, { "id": "identity_108323", "image": "Super-CLEVR/images/superCLEVR_new_001498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle that is behind the purple car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 29.109375, "loss": 4.130204200744629 }, { "id": "identity_203824", "image": "A-OKVQA/images/530683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the direction the person is facing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.515625, "loss": 4.167688846588135 }, { "id": "identity_31628", "image": "PMC-VQA/images/PMC5168471_fig2_106327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic tool used to get the image seen in this report? \nChoices:\n(A) X-ray \n(B) Echocardiogram \n(C) MRI \n(D) CT scan. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.21875, "loss": 4.418763637542725 }, { "id": "identity_57494", "image": "DVQA/images/bar_train_00046721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy according to the graph?" }, { "from": "gpt", "value": "The answer is artist" } ], "clip_score": 32.75, "loss": 4.1053595542907715 }, { "id": "identity_157256", "image": "ScienceQA/images/21051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many letters are there in the text on the baseball cap?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.6875, "loss": 4.260720252990723 }, { "id": "identity_7894", "image": "DVQA/images/bar_train_00046331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the sixth group of bars from the left?" }, { "from": "gpt", "value": "The answer is hell" } ], "clip_score": 16.375, "loss": 4.612620830535889 }, { "id": "identity_199028", "image": "A-OKVQA/images/269254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing helmets in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.921875, "loss": 4.646979808807373 }, { "id": "identity_33619", "image": "A-OKVQA/images/411492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man doing in the venue?\nChoices:\n(A) reading\n(B) shopping\n(C) sleeping\n(D) repairing clocks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.609375, "loss": 4.307854652404785 }, { "id": "identity_338639", "image": "A-OKVQA/images/314515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the people gathered here participating in?\nChoices:\n(A) vacation\n(B) protest\n(C) cycling event\n(D) picnic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.890625, "loss": 4.2901740074157715 }, { "id": "identity_332915", "image": "ScienceQA/images/1885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Saint Paul\n(B) Madison\n(C) Indianapolis\n(D) Minneapolis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.375, "loss": 4.361382484436035 }, { "id": "identity_8694", "image": "AI2D/images/49.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage of the life cycle of the butterfly is shown at letter G?\nChoices:\n(A) adult\n(B) larva\n(C) pupa\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0625, "loss": 4.4473981857299805 }, { "id": "identity_298189", "image": "PMC-VQA/images/PMC5872966_materials-11-00387-f001_294549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the inset in the image depict?\nChoices:\n(A) The top view of thin BCN sheets \n(B) The structure of BCN sheets at the atomic level \n(C) The crystallographic orientation of thin BCN sheets \n(D) The size distribution of thin BCN sheets" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.796875, "loss": 4.257017135620117 }, { "id": "identity_269309", "image": "ScienceQA/images/6754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\n(A) Carson City\n(B) Tucson\n(C) Frankfort\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.875, "loss": 4.3119425773620605 }, { "id": "identity_333161", "image": "A-OKVQA/images/540762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the circular dais in the middle of this structure?\nChoices:\n(A) blue\n(B) white\n(C) gray\n(D) red" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.75, "loss": 4.019474029541016 }, { "id": "identity_268560", "image": "TQA/images/abc_question_images/human_system_respiratory_11327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the main respiratory muscle from the following options:\n(A) Y\n(B) S\n(C) N\n(D) L" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.15625, "loss": 4.077447414398193 }, { "id": "identity_213931", "image": "Super-CLEVR/images/superCLEVR_new_001109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the airplane located above the yellow bus?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.140625, "loss": 4.115787029266357 }, { "id": "identity_196196", "image": "PMC-VQA/images/PMC7227430_F4_171407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cells are encircled in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 27.09375, "loss": 4.200167179107666 }, { "id": "identity_302913", "image": "A-OKVQA/images/2867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening in this scene?\nChoices:\n(A) pep speech\n(B) skiing lessons\n(C) protest\n(D) punishment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.46875, "loss": 4.745387554168701 }, { "id": "identity_29479", "image": "A-OKVQA/images/337621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The skiers will have difficulty concentrating on skiing because distracts them?\nChoices:\n(A) snow\n(B) lift\n(C) their costumes\n(D) their boots" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.984375, "loss": 4.258186340332031 }, { "id": "identity_268253", "image": "Super-CLEVR/images/superCLEVR_new_001197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the car in front of the gray metallic car the same as the material of the large object behind the large dirtbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.5, "loss": 4.811842918395996 }, { "id": "identity_220957", "image": "PMC-VQA/images/PMC9117747_F1_285061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is predominantly featured in this image?" }, { "from": "gpt", "value": "The answer is The arm region" } ], "clip_score": 28.0, "loss": 5.415067195892334 }, { "id": "identity_337128", "image": "TextVQA/images/63fcb821deeab5f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the prominent vehicle?" }, { "from": "gpt", "value": "The answer is red " } ], "clip_score": 18.40625, "loss": 4.813726902008057 }, { "id": "identity_197678", "image": "PMC-VQA/images/PMC8046147_jbm410472-fig-0005_417800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.546875, "loss": 4.295933723449707 }, { "id": "identity_18452", "image": "Super-CLEVR/images/superCLEVR_new_001703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a brown mountain bike to the right of the big purple object that is to the left of the large purple thing that is on the right side of the blue airliner?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.34375, "loss": 4.938704967498779 }, { "id": "identity_126749", "image": "A-OKVQA/images/489845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many speakers can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.40625, "loss": 4.565549850463867 }, { "id": "identity_296965", "image": "TabMWP/images/18183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity on the schedule starts at 5.30 P.M.?\nOptions:\n(A) canoe trip\n(B) rock climbing class\n(C) tennis class\n(D) quiet time" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.03125, "loss": 4.133261203765869 }, { "id": "identity_20506", "image": "IconQA/images/69908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is fifth?\nChoices:\n(A) plane\n(B) bear\n(C) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.515625, "loss": 4.019327640533447 }, { "id": "identity_292502", "image": "DocVQA/images/jnyb0018_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which individuals are the RJR Attendees for the IGA Retailer of the Year Breakfast that took place on Monday, May 8?" }, { "from": "gpt", "value": "The answer is jim helm" } ], "clip_score": 25.28125, "loss": 4.13102912902832 }, { "id": "identity_262897", "image": "PMC-VQA/images/PMC3292556_pone-0032429-g007_128046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the image shown?\nChoices:\n(A) Cross sections of a frog \n(B) Frontal sections of a human brain \n(C) Sagittal sections of a mouse \n(D) Transverse sections of a chicken egg" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.234375, "loss": 4.349844932556152 }, { "id": "identity_31205", "image": "ScienceQA/images/19570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) soft\n(B) salty\n(C) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.640625, "loss": 4.234874725341797 }, { "id": "identity_270719", "image": "FigureQA/images/5024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Red or Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.125, "loss": 4.398666858673096 }, { "id": "identity_301421", "image": "AI2D/images/966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the Singly-toothed leaf?\nChoices:\n(A) C\n(B) B\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.359375, "loss": 4.151169776916504 }, { "id": "identity_285583", "image": "PMC-VQA/images/PMC6819398_Fig8_57937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the patient's body is the bone destruction observed?\nChoices:\n(A) Right femur\n(B) Left tibia\n(C) Fifth lumbar vertebra\n(D) Right shoulder blade" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.859375, "loss": 4.533131122589111 }, { "id": "identity_215750", "image": "A-OKVQA/images/328599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the jacket of the person on the right?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.6875, "loss": 4.994314193725586 }, { "id": "identity_286160", "image": "PMC-VQA/images/PMC9354698_f2_364693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized for panels A-D? \nOptions:\n(A) CT\n(B) PET\n(C) MRI\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.890625, "loss": 4.025053024291992 }, { "id": "identity_11328", "image": "PMC-VQA/images/PMC6385472_Fig3_439276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body was imaged in the CT scan? \nChoices:\n(A) Brain \n(B) Chest \n(C) Abdomen \n(D) Legs " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.84375, "loss": 4.01663875579834 }, { "id": "identity_75052", "image": "TQA/images/abc_question_images/parts_telescope_18154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the component labeled 'S' in the diagram?" }, { "from": "gpt", "value": "The answer is Primary mirror" } ], "clip_score": 25.28125, "loss": 4.711573123931885 }, { "id": "identity_12361", "image": "IconQA/images/62107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.265625, "loss": 4.0832953453063965 }, { "id": "identity_188080", "image": "A-OKVQA/images/364691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the ground watching the skateboarder?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.328125, "loss": 4.655914306640625 }, { "id": "identity_18387", "image": "AI2D/images/707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Decimation of squids may affect populations of leopard seal, killer whale and _____?\nChoices:\n(A) krill\n(B) penguin\n(C) fish\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 4.162929058074951 }, { "id": "identity_323528", "image": "PMC-VQA/images/PMC3585120_pgen-1003302-g005_188990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image depict?\nChoices:\n(A) True enamel\n(B) Resorption lacunae\n(C) Crown edge\n(D) Higher magnification" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.125, "loss": 4.329403877258301 }, { "id": "identity_182652", "image": "PMC-VQA/images/PMC2844751_F0004_60300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the white arrow pointing to in the image?" }, { "from": "gpt", "value": "The answer is A foreign body or calcification" } ], "clip_score": 25.9375, "loss": 4.769274711608887 }, { "id": "identity_183347", "image": "A-OKVQA/images/292730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stripes are on the man's tie?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 11.5078125, "loss": 4.153564453125 }, { "id": "identity_296636", "image": "PMC-VQA/images/PMC9415140_medicina-58-01079-f002_386028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What other method can be used to measure angles apart from the standard method?\nChoices:\n(A) Comparing angles to the vertical axis \n(B) Comparing angles to the anterior condylar axis of femur \n(C) Comparing angles to the posterior condylar axis of the femur \n(D) Comparing angles to the lateral axis of the hip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.859375, "loss": 4.469940185546875 }, { "id": "identity_288404", "image": "ScienceQA/images/10381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the options of being rough, fragile, or flexible?\nChoices:\n(A) rough\n(B) fragile\n(C) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.65625, "loss": 4.304765224456787 }, { "id": "identity_27173", "image": "ScienceQA/images/10707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New York\n(B) Virginia\n(C) New Jersey\n(D) Vermont" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.828125, "loss": 4.242683410644531 }, { "id": "identity_214694", "image": "A-OKVQA/images/246927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beverages can be seen on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.453125, "loss": 4.272282600402832 }, { "id": "identity_245036", "image": "VQA2.0/images/316667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light sources are visible in the image, and what types of light sources are they?" }, { "from": "gpt", "value": "The answer is Two ceiling fan lights" } ], "clip_score": 16.890625, "loss": 4.410421371459961 }, { "id": "identity_336218", "image": "A-OKVQA/images/377589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of surface are these individuals sitting on?\nChoices:\n(A) ash\n(B) sand\n(C) marble\n(D) concrete" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4765625, "loss": 4.418461799621582 }, { "id": "identity_6741", "image": "ScienceQA/images/9684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) stretchy\n(B) opaque\n(C) sour" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.234375, "loss": 4.469865322113037 }, { "id": "identity_186668", "image": "PMC-VQA/images/PMC5832061_fig6_282327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure indicated by the curved arrow?" }, { "from": "gpt", "value": "The answer is Dural sac" } ], "clip_score": 25.375, "loss": 4.143645763397217 }, { "id": "identity_265763", "image": "ScienceQA/images/11729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of South Carolina?\n(A) Columbia\n(B) Charleston\n(C) Biloxi\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0625, "loss": 4.104062080383301 }, { "id": "identity_188041", "image": "A-OKVQA/images/108446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the white truck on the left side of the image?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 23.703125, "loss": 4.70237922668457 }, { "id": "identity_269283", "image": "AI2D/images/3502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the term for the protoplasm outside the nucleus of a cell: \n(A) centriole\n(B) cell membrane\n(C) cytoplasm\n(D) lysosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.78125, "loss": 4.074334621429443 }, { "id": "identity_327918", "image": "PMC-VQA/images/PMC3356314_pone-0037520-g003_138700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the boxes in the images indicate?\nChoices:\n(A) Areas with high muscle density\n(B) Areas with low muscle density\n(C) Areas shown in other images\n(D) Areas with no muscle scars" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.296875, "loss": 4.598946571350098 }, { "id": "identity_400", "image": "FigureQA/images/53768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive Drab the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.109375, "loss": 4.067342281341553 }, { "id": "identity_330049", "image": "A-OKVQA/images/563645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red kite near the two people shaped like?\nChoices:\n(A) parrot\n(B) crab\n(C) ladybug\n(D) ball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.796875, "loss": 4.299738883972168 }, { "id": "identity_23615", "image": "A-OKVQA/images/321127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The brand company of this man's jacket is headquartered in what country?\nChoices:\n(A) united states\n(B) britain\n(C) france\n(D) italy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.578125, "loss": 4.082337379455566 }, { "id": "identity_329275", "image": "PMC-VQA/images/PMC6326132_gf0100_420639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is shown in the imaging?\nChoices:\n(A) Right upper limb\n(B) Left lower limb\n(C) Abdomen\n(D) Head" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.96875, "loss": 4.865556716918945 }, { "id": "identity_279078", "image": "PMC-VQA/images/PMC7567400_f5_276970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What information is shown in the top-left corner of the axial view?\nChoices:\n(A) Delta score \n(B) Sum of scores between technician and DeepMedic \n(C) Mean neuroradiologist score for each exam \n(D) Mean technician score for each exam." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.1875, "loss": 4.124578475952148 }, { "id": "identity_302951", "image": "A-OKVQA/images/309322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What business does the company shown by the clock engage in?\nChoices:\n(A) motorcycle manufacturing\n(B) energy\n(C) clock manufacturing\n(D) car manufacturing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7265625, "loss": 4.20697546005249 }, { "id": "identity_2759", "image": "FigureQA/images/24034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.90625, "loss": 4.353261470794678 }, { "id": "identity_280049", "image": "IconQA/images/52867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one in the sequence, following the first picture of a bucket?\nChoices:\n(A) boat\n(B) bucket\n(C) crab" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.734375, "loss": 4.068692207336426 }, { "id": "identity_334631", "image": "A-OKVQA/images/356384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual on the board attempting to maintain?\nChoices:\n(A) dryness\n(B) height\n(C) speed\n(D) balance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.7109375, "loss": 4.226864814758301 }, { "id": "identity_141087", "image": "ChartQA/images/multi_col_40619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did cross-border M&A deals surpass domestic deals?" }, { "from": "gpt", "value": "The answer is No year" } ], "clip_score": 23.40625, "loss": 4.544922351837158 }, { "id": "identity_331460", "image": "PMC-VQA/images/PMC6536547_Fig8_485626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the higher magnification view of the rectangular dotted area in the center of the image? \nChoices:\n(A) Bacteria \n(B) Fungi \n(C) Plant cells \n(D) None of the above." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.6875, "loss": 4.284177303314209 }, { "id": "identity_290473", "image": "AI2D/images/3053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does B represent?\nChoices:\n(A) DNA\n(B) ribosome\n(C) cell wall\n(D) cell membrane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.328125, "loss": 4.736512660980225 }, { "id": "identity_312340", "image": "FigureQA/images/16216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Cyan?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.2071099281311035 }, { "id": "identity_200841", "image": "PMC-VQA/images/PMC6287602_Figure1_409994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other cell types visible besides the foam cells in the image?" }, { "from": "gpt", "value": "The answer is Not prominently visible" } ], "clip_score": 26.296875, "loss": 4.046388626098633 }, { "id": "identity_321320", "image": "A-OKVQA/images/17667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is moving this vessel?\nChoices:\n(A) tugged\n(B) motor\n(C) wind\n(D) paddle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.0625, "loss": 4.345461368560791 }, { "id": "identity_7314", "image": "Super-CLEVR/images/superCLEVR_new_001512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a big yellow motorbike made of the same material as the large cruiser?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.796875, "loss": 5.068385124206543 }, { "id": "identity_185061", "image": "PMC-VQA/images/PMC9250258_Fig2_329773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall tone of the image?" }, { "from": "gpt", "value": "The answer is Grayscale with high contrast" } ], "clip_score": 18.265625, "loss": 4.46543025970459 }, { "id": "identity_28806", "image": "Super-CLEVR/images/superCLEVR_new_001636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the biplane behind the red bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 24.078125, "loss": 4.691535472869873 }, { "id": "identity_10138", "image": "PMC-VQA/images/PMC3724503_F2_220501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What view is shown in the image? \nChoices:\n(A) Dorsal view \n(B) Frontal view \n(C) Ventral view \n(D) Lateral view " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.40625, "loss": 4.232106685638428 }, { "id": "identity_36948", "image": "DocVQA/images/gscg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for chain authorisation?" }, { "from": "gpt", "value": "The answer is pat chapel" } ], "clip_score": 20.96875, "loss": 5.112049579620361 }, { "id": "identity_76705", "image": "AI2D/images/450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in the food web?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 29.015625, "loss": 4.676904678344727 }, { "id": "identity_29499", "image": "PMC-VQA/images/PMC5661094_fig6_232582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown in the caption? \nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.1875, "loss": 4.052309989929199 }, { "id": "identity_27746", "image": "TQA/images/abc_question_images/sun_layers_16310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is between the convection zone and photosphere?\nChoices:\n(A) M\n(B) P\n(C) J\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.453125, "loss": 4.3293304443359375 }, { "id": "identity_334363", "image": "PMC-VQA/images/PMC8085245_Fig4_431163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two 2D tumor measurement methods are depicted in the image?\nChoices:\n(A) Length and width\n(B) Weight and height\n(C) Area and perimeter\n(D) Volume and density" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.135249137878418 }, { "id": "identity_281396", "image": "A-OKVQA/images/26164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the painting on the wall?\nChoices:\n(A) traffic signs\n(B) guide stickers\n(C) advertisement\n(D) art work" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.90625, "loss": 4.482560634613037 }, { "id": "identity_281417", "image": "IconQA/images/100673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored purple? Is it:\n(A) 1/2\n(B) 1/3\n(C) 1/4\n(D) 1/5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.15625, "loss": 4.093646049499512 }, { "id": "identity_35667", "image": "A-OKVQA/images/331082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people doing on the street?\nChoices:\n(A) racing\n(B) protesting\n(C) jogging\n(D) shopping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.109375, "loss": 4.087710857391357 }, { "id": "identity_286004", "image": "IconQA/images/74865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.171875, "loss": 4.178685188293457 }, { "id": "identity_33988", "image": "AI2D/images/4329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lychee represented by?\nChoices:\n(A) B\n(B) A\n(C) D\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.625, "loss": 4.239525318145752 }, { "id": "identity_278908", "image": "A-OKVQA/images/571048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of seating is located in front of the yellow plane?\nChoices:\n(A) chair\n(B) bench\n(C) bed\n(D) sofa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.8125, "loss": 4.420062065124512 }, { "id": "identity_25121", "image": "ScienceQA/images/5636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nThe smallest planet is made mainly of rock.\nChoices:\n(A) false\n(B) true" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.53125, "loss": 4.005278587341309 }, { "id": "identity_38359", "image": "IconQA/images/72180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is eighth?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.125, "loss": 4.359371662139893 }, { "id": "identity_112956", "image": "VQA-AS/images/13548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.859375, "loss": 4.325167655944824 }, { "id": "identity_337780", "image": "PMC-VQA/images/PMC3858590_Fig2_250830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiographic finding in this image?\nChoices:\n(A) Decreased breast density \n(B) Increased breast density \n(C) Mass in the upper outer breast \n(D) Calcifications in the lower inner breast" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.953125, "loss": 4.51663064956665 }, { "id": "identity_18374", "image": "A-OKVQA/images/374685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located in the cages?\nChoices:\n(A) animal\n(B) pocket book\n(C) food\n(D) bottle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.484375, "loss": 4.411884307861328 }, { "id": "identity_281423", "image": "DVQA/images/bar_train_00019781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label corresponds to the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is output" } ], "clip_score": 24.46875, "loss": 4.764059543609619 }, { "id": "identity_291116", "image": "PMC-VQA/images/PMC5537248_Fig2_197740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the occipital lesion on non-enhanced CT imaging? \nChoices:\n(A) Enlargement \n(B) Calcification \n(C) Inflammation \n(D) Bleeding" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.109375, "loss": 4.215905666351318 }, { "id": "identity_34257", "image": "IconQA/images/56841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.890625, "loss": 4.032064914703369 }, { "id": "identity_137142", "image": "DocVQA/images/xkbl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reference number mentioned in the document?" }, { "from": "gpt", "value": "The answer is UCS______" } ], "clip_score": 19.796875, "loss": 4.500553607940674 }, { "id": "identity_233563", "image": "Geometry3K/images/313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the perimeter of triangle \\( ABD \\)" }, { "from": "gpt", "value": "The answer is 73" } ], "clip_score": 32.65625, "loss": 4.019235134124756 }, { "id": "identity_35078", "image": "A-OKVQA/images/160428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The orange orbs seen here are actually what?\nChoices:\n(A) real oranges\n(B) plastic\n(C) lemons\n(D) pinatas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.301304817199707 }, { "id": "identity_293427", "image": "PMC-VQA/images/PMC7154549_epi16451-fig-0002_150985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging map clearly displayed the lesion in a patient with FCD IIb?\nOptions:\n(A) FLAIR map\n(B) FA map\n(C) ICVF map\n(D) MD map" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.5394792556762695 }, { "id": "identity_280911", "image": "A-OKVQA/images/80543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the man in black crouching?\nChoices:\n(A) to dive\n(B) to jump\n(C) gain speed\n(D) to sit" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.3203125, "loss": 4.257821083068848 }, { "id": "identity_175417", "image": "TabMWP/images/16474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which pizza type received the highest total votes from both schools combined?" }, { "from": "gpt", "value": "The answer is Ham" } ], "clip_score": 32.09375, "loss": 4.140063285827637 }, { "id": "identity_317946", "image": "A-OKVQA/images/196192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is written on these tags?\nChoices:\n(A) hotel\n(B) destination\n(C) good luck\n(D) itinerary" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.03125, "loss": 4.720429420471191 }, { "id": "identity_197344", "image": "VQA2.0/images/386390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sheep are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 24.53125, "loss": 4.159687519073486 }, { "id": "identity_35678", "image": "A-OKVQA/images/393464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are children pictured above doing?\nChoices:\n(A) playing\n(B) jogging\n(C) eating\n(D) walking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.90625, "loss": 4.095663547515869 }, { "id": "identity_11717", "image": "AI2D/images/2687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown at label B?\nChoices:\n(A) September equinox\n(B) March equinox\n(C) June solstice\n(D) December solstice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.59375, "loss": 4.395071983337402 }, { "id": "identity_309500", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the big purple thing behind the small gray scooter?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 29.25, "loss": 4.105320453643799 }, { "id": "identity_261425", "image": "PMC-VQA/images/PMC4911964_NCV556F5_42411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is labeled as number 5 in the image?\nChoices:\n(A) Blood vessel \n(B) Peripheral Bronchiectasis \n(C) Lung Tumor \n(D) Diaphragm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.450923919677734 }, { "id": "identity_118484", "image": "PMC-VQA/images/PMC8961915_Fig3_237822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the severity of the condition from this image alone?" }, { "from": "gpt", "value": "The answer is No, additional information needed" } ], "clip_score": 27.28125, "loss": 4.10329008102417 }, { "id": "identity_31733", "image": "VQA2.0/images/390792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the people coming out of?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 9.8515625, "loss": 4.03214168548584 }, { "id": "identity_260796", "image": "A-OKVQA/images/200671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three specific features in the image that indicate this is a street skateboarding competition, and explain how each feature supports this conclusion" }, { "from": "gpt", "value": "The answer is Ledges, urban environment, audience and branding" } ], "clip_score": 27.796875, "loss": 5.119114875793457 }, { "id": "identity_337222", "image": "A-OKVQA/images/41728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the window of the tall white structure?\nChoices:\n(A) kites\n(B) pictures\n(C) light\n(D) fishing poles" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.8828125, "loss": 4.327279567718506 }, { "id": "identity_20742", "image": "PMC-VQA/images/PMC4329203_Fig5_358497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located in the center of the cystic tumor? \nChoices:\n(A) Fluid \n(B) Cyst \n(C) Lesion \n(D) Tumor " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.578125, "loss": 4.836602210998535 }, { "id": "identity_703", "image": "FigureQA/images/45323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Gray the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.359375, "loss": 4.150025367736816 }, { "id": "identity_301759", "image": "AI2D/images/3463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the simple eye and the brain?\nChoices:\n(A) lung\n(B) none of the above\n(C) poison gland\n(D) eye" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.03125, "loss": 4.007733345031738 }, { "id": "identity_266502", "image": "ScienceQA/images/11999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New York\n(B) Maryland\n(C) Tennessee\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.546875, "loss": 4.657981872558594 }, { "id": "identity_200152", "image": "A-OKVQA/images/308034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the boat?" }, { "from": "gpt", "value": "The answer is Mario" } ], "clip_score": 22.203125, "loss": 4.519403457641602 }, { "id": "identity_222359", "image": "VQA-AS/images/5304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does the small table have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.140625, "loss": 4.583592414855957 }, { "id": "identity_25210", "image": "A-OKVQA/images/121651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the side of the nearest boat want you to visit?\nChoices:\n(A) website\n(B) brazil\n(C) japan\n(D) restaurant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.6875, "loss": 4.160844326019287 }, { "id": "identity_117468", "image": "A-OKVQA/images/247909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street mentioned on the sign in the background?" }, { "from": "gpt", "value": "The answer is Crystal Lake Dr" } ], "clip_score": 17.40625, "loss": 4.275825023651123 }, { "id": "identity_287596", "image": "A-OKVQA/images/84162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of water transportation vehicle is used for this purpose? Is it a yacht, canoe, cargo ship, or raft boat?\nChoices:\n(A) yacht\n(B) canoe\n(C) cargo ship\n(D) raft boat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.515625, "loss": 4.086245536804199 }, { "id": "identity_39958", "image": "DVQA/images/bar_train_00176665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest total sales across both stores?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 30.921875, "loss": 4.628155708312988 }, { "id": "identity_212202", "image": "A-OKVQA/images/308760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object to the left of the large toy?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 17.15625, "loss": 5.257664680480957 }, { "id": "identity_296786", "image": "PMC-VQA/images/PMC3212535_pone-0027190-g002_115008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific feature is clearly shown in the image?\nOptions:\n(A) Trabeculae\n(B) Crossribs\n(C) Microribs\n(D) Upper laminae" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.46875, "loss": 5.047511100769043 }, { "id": "identity_117413", "image": "VQA-RAD/images/synpic51872.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the heart appear to be of normal size on the image?" }, { "from": "gpt", "value": "The answer is Heart size appears normal" } ], "clip_score": 24.21875, "loss": 4.391704082489014 }, { "id": "identity_182705", "image": "PMC-VQA/images/PMC4794473_Fig3_12315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tumors are indicated in the image?" }, { "from": "gpt", "value": "The answer is V2 Tumor" } ], "clip_score": 25.78125, "loss": 4.398597240447998 }, { "id": "identity_34852", "image": "IconQA/images/51605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) impossible\n(B) probable\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.34375, "loss": 4.433599472045898 }, { "id": "identity_10577", "image": "AI2D/images/3255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of the nucleus called?\nChoices:\n(A) nucleolus\n(B) smooth ER\n(C) none of the above\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 4.050571441650391 }, { "id": "identity_109550", "image": "TextVQA/images/a8f62b52db6f95c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the establishment at the corner of the street?" }, { "from": "gpt", "value": "The answer is Amis de la Estudiantina" } ], "clip_score": 22.984375, "loss": 4.030031204223633 }, { "id": "identity_208798", "image": "PMC-VQA/images/PMC5841142_awx262-F4_284852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colored markers are present in the brain image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.875, "loss": 4.253731727600098 }, { "id": "identity_145861", "image": "DocVQA/images/ltgj0223_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any individuals with a position that includes the word 'Executive'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.21875, "loss": 4.237925052642822 }, { "id": "identity_128654", "image": "A-OKVQA/images/370337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the roof of the white building?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 13.03125, "loss": 4.4532270431518555 }, { "id": "identity_127407", "image": "Super-CLEVR/images/superCLEVR_new_001552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles have two wheels?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.125, "loss": 5.171184062957764 }, { "id": "identity_293182", "image": "A-OKVQA/images/356015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Between what objects are the horses in the foreground located?\nChoices:\n(A) boats\n(B) statues\n(C) poles\n(D) fish netting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.120736598968506 }, { "id": "identity_281978", "image": "A-OKVQA/images/116358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the game, what is the specific time of day being depicted among the following options: (A) midnight, (B) twilight, (C) dusk, or (D) dawn?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6328125, "loss": 4.4404425621032715 }, { "id": "identity_10892", "image": "AI2D/images/898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If insects were eliminated form the web, who might starve?\nChoices:\n(A) Hawks\n(B) Rabbits\n(C) Birds\n(D) Squirrels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 4.106965065002441 }, { "id": "identity_237775", "image": "A-OKVQA/images/331140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the presence of the orange ring on the boat contribute to the overall safety measures in the marina, considering the types of boats and their typical uses?" }, { "from": "gpt", "value": "The answer is Enhances safety by providing rescue means for all boats" } ], "clip_score": 23.359375, "loss": 4.227558612823486 }, { "id": "identity_231474", "image": "VizWiz/images/VizWiz_train_00015286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tines does this fork have, and what is the total number of tines if you have three such forks?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.28125, "loss": 4.002331733703613 }, { "id": "identity_190408", "image": "PMC-VQA/images/PMC8687640_f01_142757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any indications of pathology in this histological image?" }, { "from": "gpt", "value": "The answer is No obvious pathology" } ], "clip_score": 34.875, "loss": 4.37345027923584 }, { "id": "identity_299821", "image": "PMC-VQA/images/PMC7482117_f2_251609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of imaging procedure was conducted? \nOptions:\n(A) X-ray\n(B) CT scan\n(C) Magnetic resonance imaging\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.640625, "loss": 4.655704498291016 }, { "id": "identity_131206", "image": "TextVQA/images/47f01ef525e961f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pitch is likely being attempted, as indicated by the catcher's stance and glove position?" }, { "from": "gpt", "value": "The answer is A low pitch" } ], "clip_score": 27.65625, "loss": 4.0453572273254395 }, { "id": "identity_314010", "image": "FigureQA/images/48728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.421875, "loss": 4.058924198150635 }, { "id": "identity_196061", "image": "A-OKVQA/images/278923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the base of the vase?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 25.984375, "loss": 4.358157157897949 }, { "id": "identity_332260", "image": "A-OKVQA/images/471188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company owns vehicles like the ones on the road?\nChoices:\n(A) tesla\n(B) dunkin donuts\n(C) mcdonalds\n(D) greyhound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.921875, "loss": 4.133415699005127 }, { "id": "identity_9086", "image": "PMC-VQA/images/PMC6686505_Fig2_21285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the view of the chest radiograph shown? \nChoices:\n(A) AP view \n(B) Lateral view \n(C) PA view \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.59375, "loss": 4.2167863845825195 }, { "id": "identity_299195", "image": "A-OKVQA/images/577838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which setting does this venue belong to: park, farm, zoo, or wilderness?\nChoices:\n(A) park\n(B) farm\n(C) zoo\n(D) wilderness" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.59375, "loss": 4.663895606994629 }, { "id": "identity_31798", "image": "A-OKVQA/images/262124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of weather is this area experiencing?\nChoices:\n(A) rain\n(B) snow\n(C) hail\n(D) wind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.734375, "loss": 4.5494256019592285 }, { "id": "identity_269606", "image": "FigureQA/images/71151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Sky Blue or Dodger Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.359375, "loss": 4.589957237243652 }, { "id": "identity_13028", "image": "AI2D/images/4640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which of these insect is longest one\nChoices:\n(A) a\n(B) c\n(C) D\n(D) b" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.875, "loss": 4.082329750061035 }, { "id": "identity_315199", "image": "TQA/images/abc_question_images/earth_eclipses_11656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the umbra?\nChoices:\n(A) W\n(B) R\n(C) U\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.1875, "loss": 4.352944374084473 }, { "id": "identity_53897", "image": "MapQA/images/map_11549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage appearance in the range of 47.0%-51.7%?" }, { "from": "gpt", "value": "The answer is Four states" } ], "clip_score": 34.65625, "loss": 4.149266242980957 }, { "id": "identity_223467", "image": "TextVQA/images/0085245e85b9815d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phones are used to create the letter 'E'?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 15.8125, "loss": 5.070671081542969 }, { "id": "identity_104784", "image": "Super-CLEVR/images/superCLEVR_new_000814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest bicycle in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 29.0, "loss": 4.329741954803467 }, { "id": "identity_118273", "image": "TextVQA/images/3e569f2f4c818ed6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word on the left page?" }, { "from": "gpt", "value": "The answer is grow" } ], "clip_score": 16.46875, "loss": 5.451783657073975 }, { "id": "identity_61521", "image": "DocVQA/images/fzgj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size group classification for Hittkari Multifilters Ltd.?" }, { "from": "gpt", "value": "The answer is Decile 5" } ], "clip_score": 23.828125, "loss": 4.003873825073242 }, { "id": "identity_325323", "image": "VQA-AS/images/6751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person sitting on the rug?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.21875, "loss": 4.112652778625488 }, { "id": "identity_312641", "image": "FigureQA/images/55637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.453125, "loss": 4.409785747528076 }, { "id": "identity_325276", "image": "FigureQA/images/35555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.453125, "loss": 4.266334533691406 }, { "id": "identity_314600", "image": "PMC-VQA/images/PMC3921228_pone-0088785-g001_266413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the brain is shown in the images?\nChoices:\n(A) right\n(B) left\n(C) both\n(D) none" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.295541763305664 }, { "id": "identity_2058", "image": "PMC-VQA/images/PMC7670585_fig4_299612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image feature in the left upper lobe? \nChoices:\n(A) typical halo sign \n(B) nontypical halo sign \n(C) ground-glass opacities \n(D) consolidation " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.371364593505859 }, { "id": "identity_211610", "image": "PMC-VQA/images/PMC5760665_Fig3_261582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"DBP\" point to in the image?" }, { "from": "gpt", "value": "The answer is A tunnel-like structure" } ], "clip_score": 26.25, "loss": 4.511477947235107 }, { "id": "identity_325840", "image": "PMC-VQA/images/PMC4538768_fig8_415200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the Maxillary right central tooth?\nChoices:\n(A) Ankylosis \n(B) Internal resorption \n(C) External resorption \n(D) Fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.71875, "loss": 4.59389591217041 }, { "id": "identity_38406", "image": "TextVQA/images/3492f2b6c34e80cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the character drawn to the left of duffy?" }, { "from": "gpt", "value": "The answer is roz" } ], "clip_score": 22.734375, "loss": 4.42898416519165 }, { "id": "identity_195364", "image": "A-OKVQA/images/267885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man in the foreground wearing on his back?" }, { "from": "gpt", "value": "The answer is A vest with the word 'WAD'" } ], "clip_score": 17.078125, "loss": 4.047111988067627 }, { "id": "identity_330631", "image": "TabMWP/images/9519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.25, "loss": 4.25185489654541 }, { "id": "identity_145868", "image": "DocVQA/images/hjpn0000_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the RJR Representative?" }, { "from": "gpt", "value": "The answer is G. N. Kourc Jr" } ], "clip_score": 23.375, "loss": 5.008422374725342 }, { "id": "identity_261612", "image": "DVQA/images/bar_train_00107775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, starting from the left, is labeled as the third one?" }, { "from": "gpt", "value": "The answer is cold" } ], "clip_score": 15.9609375, "loss": 4.129424571990967 }, { "id": "identity_49731", "image": "DocVQA/images/fllm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the check received?" }, { "from": "gpt", "value": "The answer is To cover a round trip ticket economy class" } ], "clip_score": 16.828125, "loss": 4.453571319580078 }, { "id": "identity_30951", "image": "A-OKVQA/images/539317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the large boat do in the sea?\nChoices:\n(A) sell floats\n(B) haul weed\n(C) sell cokes\n(D) fish" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.25, "loss": 4.227272033691406 }, { "id": "identity_144690", "image": "DocVQA/images/mflv0001_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the message written in the fax?" }, { "from": "gpt", "value": "The answer is NUMBA Congress Script. Please call this a.m. w/comments. Thanks" } ], "clip_score": 23.75, "loss": 4.330489158630371 }, { "id": "identity_276404", "image": "TQA/images/abc_question_images/parts_microscope_17182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of letter A in the context of a microscope?\nChoices:\n(A) AS PROTECTION\n(B) TO ADJUST FOCUS\n(C) FOR MAGNIFICATION\n(D) IT IS THE OCULAR LENS WHERE THE USER LOOKS INTO THE MICROSCOPE TO VIEW THE OBJECT BEING ANALYZED" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.328125, "loss": 4.228646755218506 }, { "id": "identity_267210", "image": "AI2D/images/2730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage follows the waning gibbous phase in the lunar cycle? Is it:\n(A) waxing gibbous\n(B) new moon\n(C) last quarter\n(D) first quarter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.328125, "loss": 4.2402191162109375 }, { "id": "identity_268083", "image": "AI2D/images/3284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure labeled A from the following options:\n(A) Lysosome\n(B) Microtubules\n(C) Ribosomes\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.8125, "loss": 4.021352767944336 }, { "id": "identity_132195", "image": "TextVQA/images/7f1324046d6d2e5a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing helmets?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.40625, "loss": 5.008127212524414 }, { "id": "identity_9453", "image": "AI2D/images/3493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the outer most layer of cell?\nChoices:\n(A) Cytoplasm\n(B) Cell wall\n(C) Chloraplast\n(D) Vacuole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.244187355041504 }, { "id": "identity_64013", "image": "AI2D/images/4392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In terms of leaflet arrangement, what is the main difference between even compound and palmately compound leaves?" }, { "from": "gpt", "value": "The answer is Attachment point of leaflets" } ], "clip_score": 39.0, "loss": 4.134976863861084 }, { "id": "identity_281637", "image": "TabMWP/images/5492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which day of the week had the lowest number of calls received by the technical support line? Was it Monday, Tuesday, Wednesday, or Thursday?\nChoices:\n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0625, "loss": 4.452552795410156 }, { "id": "identity_122690", "image": "PMC-VQA/images/PMC7137947_RSOS192179F2_145974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any holes visible in the object?" }, { "from": "gpt", "value": "The answer is Yes, two holes" } ], "clip_score": 16.203125, "loss": 4.192511558532715 }, { "id": "identity_21981", "image": "PMC-VQA/images/PMC3748773_fig9_225834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is being compressed by the hepatocellular carcinoma? \nChoices:\n(A) Right kidney \n(B) Left kidney \n(C) Liver \n(D) Spleen " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 4.512277603149414 }, { "id": "identity_27440", "image": "ScienceQA/images/10850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Hawaii?\nChoices:\n(A) Phoenix\n(B) Honolulu\n(C) Juneau\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.40625, "loss": 4.461428165435791 }, { "id": "identity_319597", "image": "FigureQA/images/93197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.828125, "loss": 4.591111660003662 }, { "id": "identity_265929", "image": "FigureQA/images/76928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the toughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.4375, "loss": 4.2748003005981445 }, { "id": "identity_216024", "image": "VQA-AS/images/19781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.078125, "loss": 4.437231540679932 }, { "id": "identity_238787", "image": "TextVQA/images/d54c3f498e43774e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme discussed in the magazine issues shown in the image, and how is it visually represented on both covers?" }, { "from": "gpt", "value": "The answer is Internet regulation and diversity" } ], "clip_score": 26.046875, "loss": 5.404803276062012 }, { "id": "identity_297996", "image": "TQA/images/question_images/tides_145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which force is responsible for pulling the Earth and its oceans? \n(A) Mars Gravity\n(B) Sun's gravity\n(C) Moon Gravity\n(D) Earth's gravity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.84375, "loss": 4.292348861694336 }, { "id": "identity_307355", "image": "PMC-VQA/images/PMC6920284_Fig11_86575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vein is indicated in the caption?\nChoices:\n(A) superior vena cava\n(B) inferior vena cava\n(C) pulmonary vein\n(D) renal vein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.53125, "loss": 4.709085941314697 }, { "id": "identity_288672", "image": "PMC-VQA/images/PMC4399108_Fig1_378586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body was the swelling located?\nChoices:\n(A) Stomach\n(B) Arm\n(C) Head\n(D) Leg" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.28125, "loss": 4.0101799964904785 }, { "id": "identity_212883", "image": "A-OKVQA/images/76578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the elephants' skin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.4375, "loss": 4.231215000152588 }, { "id": "identity_309745", "image": "VQA-RAD/images/synpic27646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can the pattern in this image be described?" }, { "from": "gpt", "value": "The answer is Micronodular" } ], "clip_score": 23.125, "loss": 4.990991592407227 }, { "id": "identity_307422", "image": "TQA/images/abc_question_images/photosynthesis_11266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents oxygen in the diagram?\nChoices:\n(A) Y\n(B) F\n(C) R\n(D) D" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.453125, "loss": 4.054386138916016 }, { "id": "identity_327576", "image": "A-OKVQA/images/104486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the person wearing a red coat?\nChoices:\n(A) bell boy\n(B) hotel guest\n(C) hotel manager\n(D) housekeeper" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.453125, "loss": 4.625075340270996 }, { "id": "identity_337326", "image": "TextVQA/images/1b9e29f852b3fec2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What types of items are available here?" }, { "from": "gpt", "value": "The answer is comic pen" } ], "clip_score": 23.125, "loss": 5.129408359527588 }, { "id": "identity_120518", "image": "TextVQA/images/9fe732e820cda038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature of the washing machine is highlighted on the smartphone screen?" }, { "from": "gpt", "value": "The answer is 6 Motion" } ], "clip_score": 24.671875, "loss": 4.114924907684326 }, { "id": "identity_279639", "image": "ScienceQA/images/15355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina?\n(A) Raleigh\n(B) Louisville\n(C) Salt Lake City\n(D) Boston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.828125, "loss": 4.250472545623779 }, { "id": "identity_13775", "image": "VizWiz/images/VizWiz_train_00005350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this food?" }, { "from": "gpt", "value": "The answer is this meat" } ], "clip_score": 22.03125, "loss": 5.864651203155518 }, { "id": "identity_319654", "image": "DocVQA/images/qkxn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How should the implant fixation be checked to ensure that this is secure in the bone?" }, { "from": "gpt", "value": "The answer is with finger pressure" } ], "clip_score": 26.609375, "loss": 4.725897312164307 }, { "id": "identity_331371", "image": "A-OKVQA/images/318701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does the way this individual appears make them look like what kind of being?\nChoices:\n(A) ghost\n(B) werewolf\n(C) vampire\n(D) wendigo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.8203125, "loss": 4.053629398345947 }, { "id": "identity_311454", "image": "AI2D/images/1989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the sagebrush was removed from this ecosystem, which animal would be left without any food sources?\nChoices:\n(A) Mule deer\n(B) Antelope\n(C) Moose\n(D) Man" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.671875, "loss": 4.125126838684082 }, { "id": "identity_283699", "image": "PMC-VQA/images/PMC6163546_diagnostics-08-00053-f019_374966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of breast view is depicted in the schematic? \nOptions:\n(A) Frontal view \n(B) Lateral view \n(C) Oblique view \n(D) Transverse view" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.1875, "loss": 4.356313228607178 }, { "id": "identity_196345", "image": "TextVQA/images/55eb940224c400ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the station marked with an image on the left side of the board?" }, { "from": "gpt", "value": "The answer is Heritage Square Station" } ], "clip_score": 30.5, "loss": 4.191619873046875 }, { "id": "identity_330695", "image": "A-OKVQA/images/31057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near this truck?\nChoices:\n(A) traffic cone\n(B) carrot\n(C) horse\n(D) stop sign" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.359375, "loss": 4.364951133728027 }, { "id": "identity_28147", "image": "FigureQA/images/7461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.875, "loss": 4.149925231933594 }, { "id": "identity_263223", "image": "A-OKVQA/images/313201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which food is this country renowned?\nChoices:\n(A) pizza\n(B) poutine\n(C) sushi\n(D) borsht" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.515625, "loss": 4.1479597091674805 }, { "id": "identity_307095", "image": "TQA/images/question_images/human_system_respiratory_2858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is below the pleural fluid?\nChoices:\n(A) Diaphragm\n(B) Trachea\n(C) Aorta\n(D) Lungs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5, "loss": 4.104267120361328 }, { "id": "identity_125013", "image": "VQA2.0/images/338549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.078125, "loss": 4.301419734954834 }, { "id": "identity_269564", "image": "TQA/images/abc_question_images/human_system_nervous_16176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the letter that represents the corpus callosum among the following choices: K, Y, L, or A?\nChoices:\n(A) K\n(B) Y\n(C) L\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0625, "loss": 4.589819431304932 }, { "id": "identity_214524", "image": "VQA2.0/images/211919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the water?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.4375, "loss": 4.324166297912598 }, { "id": "identity_21130", "image": "TQA/images/abc_question_images/earth_day_night_10096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter identifies sunlight?\nChoices:\n(A) M\n(B) L\n(C) R\n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 4.587324619293213 }, { "id": "identity_331811", "image": "Super-CLEVR/images/superCLEVR_new_001495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the big motorbike that is the same color as the school bus made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 27.296875, "loss": 4.281955718994141 }, { "id": "identity_301771", "image": "AI2D/images/2071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If this predator were to disappear from the food web, who would benefit the most?\nChoices:\n(A) Northern Anchovy\n(B) Giant Pacific Octopus\n(C) Phytoplankton\n(D) Zooplankton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.3125, "loss": 4.292255401611328 }, { "id": "identity_20052", "image": "A-OKVQA/images/223095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of ship is the one in the water?\nChoices:\n(A) container\n(B) naval\n(C) passenger\n(D) tanker" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.484375, "loss": 4.44423770904541 }, { "id": "identity_289703", "image": "PMC-VQA/images/PMC6857121_F1_68934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI sequence was utilized to capture the image? \nOptions:\n(A) T1-weighted \n(B) T2-weighted \n(C) Diffusion-weighted \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.062740325927734 }, { "id": "identity_305894", "image": "A-OKVQA/images/458673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these green?\nChoices:\n(A) paint\n(B) moss\n(C) mold\n(D) rust" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.9296875, "loss": 4.5999579429626465 }, { "id": "identity_202525", "image": "VQA2.0/images/428788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles have visible blue lights on top?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.984375, "loss": 5.02116060256958 }, { "id": "identity_271192", "image": "A-OKVQA/images/494622.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of environment is this in terms of style? \nOptions:\n(A) Russian\n(B) Libertarian\n(C) Victorian\n(D) Cajun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.046875, "loss": 4.297667503356934 }, { "id": "identity_286732", "image": "FigureQA/images/72900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is greater, Lawn Green or Chocolate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 4.453694820404053 }, { "id": "identity_334028", "image": "A-OKVQA/images/353507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What place might you visit in the thing the cake is shaped like?\nChoices:\n(A) ocean\n(B) volcano\n(C) cave\n(D) space" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.09375, "loss": 4.000112056732178 }, { "id": "identity_279391", "image": "PMC-VQA/images/PMC6013898_Fig2_331973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which specific location within the brain can the ovoid mass be found? \nChoices:\n(A) Below the left orbital globe, on the left frontal bone\n(B) Above the right orbital globe, on the right frontal bone\n(C) Above the left occipital globe, on the left parietal bone\n(D) Below the right parietal globe, on the right temporal bone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.171021938323975 }, { "id": "identity_274652", "image": "FigureQA/images/75185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Saddle Brown? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.628968238830566 }, { "id": "identity_324479", "image": "VizWiz/images/VizWiz_train_00003279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of soda is this?" }, { "from": "gpt", "value": "The answer is aef" } ], "clip_score": 27.046875, "loss": 6.229708194732666 }, { "id": "identity_294264", "image": "PMC-VQA/images/PMC6206383_Fig20_387543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following abnormalities is depicted in the CT angiogram images? \n(A) Lesion surrounding the left internal carotid artery \n(B) Lesion surrounding the right internal carotid artery \n(C) Lesion surrounding the left vertebral artery \n(D) Lesion surrounding the right vertebral artery." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0625, "loss": 4.165213584899902 }, { "id": "identity_186085", "image": "PMC-VQA/images/PMC8977489_F5_242901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of images are presented in this figure?" }, { "from": "gpt", "value": "The answer is Two sets" } ], "clip_score": 19.734375, "loss": 4.266287326812744 }, { "id": "identity_10746", "image": "FigureQA/images/46489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple greater than Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.1875, "loss": 4.02333402633667 }, { "id": "identity_282905", "image": "VizWiz/images/VizWiz_train_00016462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a transformer with a voltage rating of twelve volts? It should be labeled as twelve v or one two v." }, { "from": "gpt", "value": "The answer is bc" } ], "clip_score": 22.15625, "loss": 5.22697114944458 }, { "id": "identity_118708", "image": "VQA-AS/images/6541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two logs" } ], "clip_score": 20.828125, "loss": 4.188609600067139 }, { "id": "identity_328393", "image": "ScienceQA/images/13419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places does the prime meridian intersect?\nChoices:\n(A) Asia\n(B) North America\n(C) Antarctica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.59375, "loss": 4.078283309936523 }, { "id": "identity_31123", "image": "PMC-VQA/images/PMC5960698_F2_316506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show?\nChoices:\n(A) Edema of the cerebellum (black arrow). \n(B) Atrophy of the nodulus (black arrow). \n(C) Enlarged fourth ventricle without any changes in the cerebellum. \n(D) Enlarged nodulus (black arrow) within the small fourth ventricle. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.5, "loss": 4.179752349853516 }, { "id": "identity_36484", "image": "PMC-VQA/images/PMC5085966_Fig1_89262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to evaluate the tumor? \nChoices:\n(A) MRI \n(B) Endoscopic ultrasonography \n(C) X-ray \n(D) CT-scan " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.484375, "loss": 4.253439903259277 }, { "id": "identity_30637", "image": "A-OKVQA/images/340647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vehicle is a government vehicle?\nChoices:\n(A) firetruck\n(B) none\n(C) taxi\n(D) van" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.265625, "loss": 4.318081855773926 }, { "id": "identity_293355", "image": "PMC-VQA/images/PMC5352873_fig1_140562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the form of the tumor?\nOptions:\n(A) well-defined\n(B) round\n(C) oval\n(D) undefined" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.15625, "loss": 4.646827697753906 }, { "id": "identity_278812", "image": "PMC-VQA/images/PMC3016500_F0003_83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the post-embolization angiogram?\nChoices:\n(A) Active contrast extravasation \n(B) Steel coils \n(C) Filling defects in the veins \n(D) Lack of blood flow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.603784084320068 }, { "id": "identity_240274", "image": "A-OKVQA/images/571048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs are visible on the seating in front of the yellow plane, and what type of seating is it?" }, { "from": "gpt", "value": "The answer is Four legs, chair" } ], "clip_score": 23.25, "loss": 4.386759281158447 }, { "id": "identity_306841", "image": "A-OKVQA/images/567572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brush between the doll's legs typically used for?\nChoices:\n(A) teeth\n(B) skin\n(C) nails\n(D) hair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.09375, "loss": 4.092085838317871 }, { "id": "identity_278552", "image": "TQA/images/abc_question_images/seasons_11349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the red line at J on the map?\nChoices:\n(A) EQUATOR\n(B) VERNAL EQUINOX\n(C) WINTER SOLSTICE\n(D) MAGNETIC POLE" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.9375, "loss": 4.409106731414795 }, { "id": "identity_36804", "image": "ScienceQA/images/6383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) the Federated States of Micronesia\n(B) the Marshall Islands\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 4.462104797363281 }, { "id": "identity_261992", "image": "AI2D/images/3302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the Golgi apparatus:\n(A) G\n(B) A\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.890625, "loss": 4.759406089782715 }, { "id": "identity_268782", "image": "IconQA/images/18021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you randomly choose a marble without looking, what is the likelihood of selecting a black one?\nChoices:\n(A) probable\n(B) unlikely\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.859375, "loss": 4.2375664710998535 }, { "id": "identity_209373", "image": "PMC-VQA/images/PMC6627094_ijms-20-03047-f007_7646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely species of the subject in the image?" }, { "from": "gpt", "value": "The answer is Rat" } ], "clip_score": 23.21875, "loss": 4.042884349822998 }, { "id": "identity_305701", "image": "TextVQA/images/7b6df76509a1983b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sign with the shoe prints indicate?" }, { "from": "gpt", "value": "The answer is zaptro" } ], "clip_score": 15.890625, "loss": 4.505921840667725 }, { "id": "identity_309129", "image": "FigureQA/images/70092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Firebrick the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.703125, "loss": 4.516624450683594 }, { "id": "identity_297558", "image": "A-OKVQA/images/569437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the next action that the skater is likely to perform?\nChoices:\n(A) sit\n(B) run\n(C) stand\n(D) land" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.140625, "loss": 4.213223457336426 }, { "id": "identity_291137", "image": "A-OKVQA/images/543713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of sheep having horns?\nChoices:\n(A) nothing\n(B) playing\n(C) mating\n(D) shock-absorbers" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.703125, "loss": 4.1871843338012695 }, { "id": "identity_298352", "image": "PMC-VQA/images/PMC8063182_fig0001_423112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the yellow arrow in the MRI image?\nChoices:\n(A) an area of clinical significance \n(B) a loss of MRI signal around the chamber \n(C) an area of interest of affected MRI signal \n(D) an expected anatomical structure." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.589100360870361 }, { "id": "identity_302194", "image": "A-OKVQA/images/107960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does this thing connect to the ground?\nChoices:\n(A) rail\n(B) hooves\n(C) track\n(D) wheels" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.015625, "loss": 4.112166404724121 }, { "id": "identity_314509", "image": "FigureQA/images/78480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5390625, "loss": 4.230250835418701 }, { "id": "identity_338505", "image": "A-OKVQA/images/523473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are some of these trees without leaves?\nChoices:\n(A) they're dead\n(B) spring\n(C) sap suckers\n(D) summer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.0078125, "loss": 4.317729473114014 }, { "id": "identity_77025", "image": "ScienceQA/images/13092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this marine food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.5625, "loss": 4.331610202789307 }, { "id": "identity_36729", "image": "VQA2.0/images/8345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pink object?" }, { "from": "gpt", "value": "The answer is towel" } ], "clip_score": 10.4765625, "loss": 4.112425804138184 }, { "id": "identity_24186", "image": "PMC-VQA/images/PMC3884609_fig1_257219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the lesion seen on B-mode US? \nChoices:\n(A) Right kidney \n(B) Left kidney \n(C) Liver \n(D) Pancreas " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.390625, "loss": 4.370761871337891 }, { "id": "identity_266553", "image": "AI2D/images/3395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the sensory organs from the following options:\n(A) B and F\n(B) K and M\n(C) E and G\n(D) N and E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.221065998077393 }, { "id": "identity_286276", "image": "DocVQA/images/yjwd0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the series?" }, { "from": "gpt", "value": "The answer is Dietitians Program" } ], "clip_score": 13.578125, "loss": 4.65248441696167 }, { "id": "identity_115467", "image": "A-OKVQA/images/47419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are displayed on the vehicle, and what are their colors?" }, { "from": "gpt", "value": "The answer is Two, orange and white" } ], "clip_score": 19.75, "loss": 4.208255290985107 }, { "id": "identity_21972", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area on the left with sign displays is most likely what kind of location?\nChoices:\n(A) shopping mall\n(B) strip mall\n(C) market\n(D) rest stop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.5859375, "loss": 4.257169246673584 }, { "id": "identity_188406", "image": "A-OKVQA/images/59278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic light poles are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.171875, "loss": 4.513651371002197 }, { "id": "identity_127610", "image": "VQA2.0/images/168558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many contrails are visible behind the airplane?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.25, "loss": 4.578590393066406 }, { "id": "identity_68937", "image": "AI2D/images/854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the image, which stage of the mosquito life cycle is aquatic?" }, { "from": "gpt", "value": "The answer is two stages" } ], "clip_score": 34.15625, "loss": 4.270039081573486 }, { "id": "identity_1332", "image": "ScienceQA/images/12803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Philadelphia\n(B) Raleigh\n(C) Charlotte\n(D) Jackson" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.65625, "loss": 4.303654670715332 }, { "id": "identity_187062", "image": "A-OKVQA/images/105389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the majority of the paper cut-outs on the table?" }, { "from": "gpt", "value": "The answer is Star" } ], "clip_score": 19.75, "loss": 5.355856895446777 }, { "id": "identity_148498", "image": "DocVQA/images/hkxm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person from the American Dietetic Association?" }, { "from": "gpt", "value": "The answer is Dorothea P. Turner" } ], "clip_score": 21.21875, "loss": 4.196161270141602 }, { "id": "identity_311153", "image": "FigureQA/images/68791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Dark Blue the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.171875, "loss": 4.051007270812988 }, { "id": "identity_290395", "image": "PMC-VQA/images/PMC7670012_F0012_299395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area pointed out by the green arrows in the image provided? \nChoices:\n(A) Putamen.\n(B) Periaqueductal gray.\n(C) Thalamus.\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.21875, "loss": 4.516440391540527 }, { "id": "identity_316644", "image": "FigureQA/images/97784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.6875, "loss": 4.432726860046387 }, { "id": "identity_30021", "image": "PMC-VQA/images/PMC7919781_materials-14-00928-f002_379848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the images? \nChoices:\n(A) Unmelted powder \n(B) Melted powder \n(C) Cracks \n(D) Smooth texture " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.671875, "loss": 4.180510520935059 }, { "id": "identity_211198", "image": "VQA2.0/images/218482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many luggage carts are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.3125, "loss": 4.322439670562744 }, { "id": "identity_30079", "image": "A-OKVQA/images/563645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red kite near the two woman shaped like?\nChoices:\n(A) parrot\n(B) crab\n(C) ladybug\n(D) ball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.125, "loss": 4.246211051940918 }, { "id": "identity_182173", "image": "A-OKVQA/images/160968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mounted police officers are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.0, "loss": 4.43690824508667 }, { "id": "identity_119077", "image": "PMC-VQA/images/PMC3411849_pone-0042850-g007_148512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the tissue type be identified based on the image?" }, { "from": "gpt", "value": "The answer is Liver tissue" } ], "clip_score": 29.828125, "loss": 4.4772257804870605 }, { "id": "identity_310140", "image": "ScienceQA/images/14853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.609375, "loss": 4.105257987976074 }, { "id": "identity_332157", "image": "A-OKVQA/images/466484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item on one of the boats would help someone get ready for rain?\nChoices:\n(A) bucket\n(B) tarp\n(C) sail\n(D) motor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.6875, "loss": 4.492587566375732 }, { "id": "identity_197587", "image": "A-OKVQA/images/297872.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company in the bottom right corner of the banner?" }, { "from": "gpt", "value": "The answer is Allergan" } ], "clip_score": 18.515625, "loss": 4.765069484710693 }, { "id": "identity_189642", "image": "A-OKVQA/images/151587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the sidewalk to the right of the bus?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.765625, "loss": 4.72353458404541 }, { "id": "identity_307874", "image": "TextVQA/images/02d94afde962b96b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is kardesler bufe" } ], "clip_score": 19.375, "loss": 4.530660152435303 }, { "id": "identity_332917", "image": "A-OKVQA/images/265277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to construct this surface?\nChoices:\n(A) caol\n(B) metal\n(C) cement\n(D) soil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.390625, "loss": 4.316409587860107 }, { "id": "identity_133898", "image": "VizWiz/images/VizWiz_train_00017077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of insect activity on the flowers?" }, { "from": "gpt", "value": "The answer is No visible evidence" } ], "clip_score": 24.15625, "loss": 4.403906345367432 }, { "id": "identity_217474", "image": "VQA2.0/images/154817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many towels are hanging on the towel rack?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.390625, "loss": 4.325605869293213 }, { "id": "identity_112867", "image": "A-OKVQA/images/96294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chefs are visible in the kitchen area?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.65625, "loss": 4.7343339920043945 }, { "id": "identity_112018", "image": "Super-CLEVR/images/superCLEVR_new_001773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.375, "loss": 4.044625282287598 }, { "id": "identity_328277", "image": "Super-CLEVR/images/superCLEVR_new_001726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large cyan object that shares the same material as the double bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.875, "loss": 5.122931480407715 }, { "id": "identity_203401", "image": "TextVQA/images/0ae44959cb83e7b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cap logo of the team?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 19.4375, "loss": 4.147770404815674 }, { "id": "identity_244591", "image": "A-OKVQA/images/544732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible activities passengers might do in the yellow canopied building, and which activity is most likely based on the visible signage and context of the surrounding area?" }, { "from": "gpt", "value": "The answer is Dining is most likely based on visible \"CAFÉ\" signage" } ], "clip_score": 19.40625, "loss": 4.07298469543457 }, { "id": "identity_20957", "image": "PMC-VQA/images/PMC7316744_Fig6_199222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the cyan areas in the image represent? \nChoices:\n(A) the coral tissue autofluorescence \n(B) the chlorophyll autofluorescence \n(C) the non-fluorescent coral skeleton underneath the tissue \n(D) the excessive light conditions. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.65625, "loss": 4.332181453704834 }, { "id": "identity_9786", "image": "FigureQA/images/55279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.546875, "loss": 4.361066818237305 }, { "id": "identity_12677", "image": "ScienceQA/images/9816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) New Zealand\n(B) Papua New Guinea\n(C) Palau\n(D) Australia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.546875, "loss": 4.043710231781006 }, { "id": "identity_271697", "image": "PMC-VQA/images/PMC7435725_materials-13-03400-f006_237018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the left column among the options provided? \nChoices:\n(A) Higher magnifications \n(B) Lower magnifications \n(C) Blends \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.03125, "loss": 4.474981784820557 }, { "id": "identity_10266", "image": "Super-CLEVR/images/superCLEVR_new_001554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a shiny bus left of the blue rubber bicycle; is it the same size as the blue matte motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.703125, "loss": 4.834641456604004 }, { "id": "identity_302795", "image": "Super-CLEVR/images/superCLEVR_new_001565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same shape as this blue rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.9140625, "loss": 4.91159200668335 }, { "id": "identity_119718", "image": "A-OKVQA/images/336360.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red lanterns are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.75, "loss": 4.7471466064453125 }, { "id": "identity_330697", "image": "PMC-VQA/images/PMC8133422_pone.0251788.g001_445198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for the knee articulation of the individual in the caption?\nChoices:\n(A) CT-MRI\n(B) PET-MRI\n(C) PET-CT\n(D) UHF-MRI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.953125, "loss": 4.0262770652771 }, { "id": "identity_111268", "image": "VQA-AS/images/9267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups are visible on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.3125, "loss": 4.410576343536377 }, { "id": "identity_277763", "image": "FigureQA/images/69164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Light Gold? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.9375, "loss": 4.112937927246094 }, { "id": "identity_316931", "image": "A-OKVQA/images/539041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of counter is depicted?\nChoices:\n(A) coin\n(B) bathroom\n(C) kitchen\n(D) store" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.59375, "loss": 4.700962543487549 }, { "id": "identity_327229", "image": "PMC-VQA/images/PMC4830041_Fig1_20947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the large mass located?\nChoices:\n(A) In a pelvic area.\n(B) In a left ventricle.\n(C) In a left atrium.\n(D) In a right atrium." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.40625, "loss": 4.178899765014648 }, { "id": "identity_226557", "image": "DVQA/images/bar_train_00186512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item in each store, and which store sold the most units of the item that sold the most units overall?" }, { "from": "gpt", "value": "The answer is essay store" } ], "clip_score": 34.5, "loss": 4.21218729019165 }, { "id": "identity_67669", "image": "AI2D/images/3207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stamens are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 34.15625, "loss": 4.445116996765137 }, { "id": "identity_192045", "image": "PMC-VQA/images/PMC2100056_F1_15102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the largest lesion in centimeters?" }, { "from": "gpt", "value": "The answer is Size in cm" } ], "clip_score": 22.109375, "loss": 4.105029582977295 }, { "id": "identity_324340", "image": "ScienceQA/images/12224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option below is a bird?\nChoices:\n(A) California toad\n(B) keel-billed toucan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.1875, "loss": 4.028695583343506 }, { "id": "identity_127905", "image": "A-OKVQA/images/60599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the background?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 13.421875, "loss": 4.776198863983154 }, { "id": "identity_109777", "image": "TextVQA/images/1003153971ee6833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main object in the image?" }, { "from": "gpt", "value": "The answer is A coin" } ], "clip_score": 27.859375, "loss": 4.107921123504639 }, { "id": "identity_280503", "image": "PMC-VQA/images/PMC9376875_f2_372531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the green dots in the graph? Is it:\n(A) Trained items\n(B) Untrained items\n(C) Known items\n(D) Unknown items" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.078125, "loss": 4.9344916343688965 }, { "id": "identity_224896", "image": "ScienceQA/images/18601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly south of the colony shown in dark green, and what is its capital city?" }, { "from": "gpt", "value": "The answer is Boston" } ], "clip_score": 30.5625, "loss": 4.316256999969482 }, { "id": "identity_196800", "image": "PMC-VQA/images/PMC7906016_F3_374088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the possible medical condition indicated by the red arrow?" }, { "from": "gpt", "value": "The answer is Possible ischemic stroke" } ], "clip_score": 31.46875, "loss": 4.228180408477783 }, { "id": "identity_99008", "image": "IconQA/images/35286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the faces of the cube?" }, { "from": "gpt", "value": "The answer is Square" } ], "clip_score": 31.421875, "loss": 4.240052700042725 }, { "id": "identity_112912", "image": "TextVQA/images/b96a64fc4a0ba145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company advertised on the billboard to the left of the Bud sign?" }, { "from": "gpt", "value": "The answer is Fleet" } ], "clip_score": 20.078125, "loss": 4.180490493774414 }, { "id": "identity_15241", "image": "TextVQA/images/7ebc2a7ca5753975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first name mentioned on the photograph?" }, { "from": "gpt", "value": "The answer is susj" } ], "clip_score": 21.65625, "loss": 5.067807674407959 }, { "id": "identity_30147", "image": "A-OKVQA/images/9946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why can't we see the man as clearly as we should be able to?\nChoices:\n(A) distortion effect\n(B) blur\n(C) watermark\n(D) picture overexposed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3046875, "loss": 4.2041144371032715 }, { "id": "identity_32884", "image": "Super-CLEVR/images/superCLEVR_new_000602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects of the same shape as the large brown object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 12.8359375, "loss": 5.150420665740967 }, { "id": "identity_299465", "image": "IconQA/images/69171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.6875, "loss": 4.929882526397705 }, { "id": "identity_202342", "image": "VQA2.0/images/438300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 16.828125, "loss": 4.183148384094238 }, { "id": "identity_270842", "image": "FigureQA/images/65858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of chocolate: (A) no or (B) yes?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.125, "loss": 4.183611869812012 }, { "id": "identity_34348", "image": "ScienceQA/images/5319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each aquarium\n(B) each aquarium . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.6875, "loss": 4.014650344848633 }, { "id": "identity_22153", "image": "TextVQA/images/0193991401f83e64.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what language has been used on the bus?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 24.375, "loss": 4.353629112243652 }, { "id": "identity_7292", "image": "TQA/images/abc_question_images/parts_worm_17321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part represents the dorsal blood vessel?\nChoices:\n(A) L\n(B) W\n(C) P\n(D) Y" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.484375, "loss": 4.1977643966674805 }, { "id": "identity_326296", "image": "PMC-VQA/images/PMC7447704_Fig15_240131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image was used to capture this enhancement?\nChoices:\n(A) T2-weighted image\n(B) CT scan\n(C) T1-weighted image\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.375, "loss": 4.383127212524414 }, { "id": "identity_266939", "image": "PMC-VQA/images/PMC4071974_fig4_301445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was successful in detecting a field inhomogeneity artifact in the frontal region?\nOptions:\n(A) SWI\n(B) BS-MRV\n(C) CT\n(D) MRI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.673685550689697 }, { "id": "identity_187721", "image": "A-OKVQA/images/572095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tent in the background?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 18.15625, "loss": 4.002906799316406 }, { "id": "identity_155391", "image": "AI2D/images/4469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different foot arch types are illustrated in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 33.9375, "loss": 4.607508659362793 }, { "id": "identity_288006", "image": "A-OKVQA/images/158787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items are the two women in the foreground holding?\nChoices:\n(A) pumpkins\n(B) purses\n(C) eggs\n(D) cats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.59375, "loss": 4.403630256652832 }, { "id": "identity_27304", "image": "DocVQA/images/nykh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is this letter addressed?" }, { "from": "gpt", "value": "The answer is Miss Bev Rosen" } ], "clip_score": 29.015625, "loss": 4.861562728881836 }, { "id": "identity_8610", "image": "FigureQA/images/81817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower greater than Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.012724876403809 }, { "id": "identity_335533", "image": "Super-CLEVR/images/superCLEVR_new_001880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the green thing that is made of the same material as the brown articulated bus?" }, { "from": "gpt", "value": "The answer is suv" } ], "clip_score": 22.875, "loss": 4.072935581207275 }, { "id": "identity_204866", "image": "TextVQA/images/6af70be36bbe7cca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the characters in costumes engaged in?" }, { "from": "gpt", "value": "The answer is Racing" } ], "clip_score": 18.125, "loss": 4.596560955047607 }, { "id": "identity_110421", "image": "TextVQA/images/320ae96161796489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the majority of the crowd's clothing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.3125, "loss": 4.324243068695068 }, { "id": "identity_33272", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black asterisk in the image? \nChoices:\n(A) Granules of xenogeneic BSM \n(B) Connective tissue \n(C) Newly formed bone \n(D) M2 macrophages. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.8125, "loss": 4.906363487243652 }, { "id": "identity_284340", "image": "FigureQA/images/5207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Aqua the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.4375, "loss": 4.652329444885254 }, { "id": "identity_129268", "image": "TextVQA/images/03cd4987c51f2db9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word in the message at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Love" } ], "clip_score": 20.8125, "loss": 4.200262546539307 }, { "id": "identity_217695", "image": "A-OKVQA/images/508985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes of traffic are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.84375, "loss": 4.226461410522461 }, { "id": "identity_265170", "image": "FigureQA/images/87128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.890625, "loss": 4.028367519378662 }, { "id": "identity_195572", "image": "Super-CLEVR/images/superCLEVR_new_001589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located furthest to the right in the image?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 20.453125, "loss": 4.278049945831299 }, { "id": "identity_326521", "image": "TabMWP/images/19315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day were the fewest cookies baked for the bake sale?\nChoices:\n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.6875, "loss": 4.041463375091553 }, { "id": "identity_3670", "image": "A-OKVQA/images/50431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are there horses in front of the carriage?\nChoices:\n(A) to pull\n(B) to pet\n(C) to ride\n(D) to eat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 4.107663154602051 }, { "id": "identity_325738", "image": "A-OKVQA/images/148668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What feature makes this bike noticeable at night?\nChoices:\n(A) blinkers\n(B) handlebar\n(C) horn\n(D) bike chain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.125, "loss": 4.321984767913818 }, { "id": "identity_301734", "image": "A-OKVQA/images/462037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the steering wheels control?\nChoices:\n(A) car\n(B) barbie house\n(C) video game\n(D) toy truck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.203026294708252 }, { "id": "identity_5588", "image": "PMC-VQA/images/PMC7644528_Fig1_292069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in the image signify? \nChoices:\n(A) Disruption of basement membrane \n(B) Stroma \n(C) Lumina \n(D) Basement membrane integrity " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.59375, "loss": 4.993314266204834 }, { "id": "identity_290015", "image": "A-OKVQA/images/29189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of displaying books on tables in this location?\nChoices:\n(A) giving away\n(B) person's hoarding\n(C) meeting people\n(D) for sale" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.640625, "loss": 4.580343246459961 }, { "id": "identity_238776", "image": "PMC-VQA/images/PMC2707627_pone-0006314-g006_41342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences in the imaging techniques used to visualize remission, and how do these differences help in identifying the correct technique used in the given image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 30.125, "loss": 4.315617084503174 }, { "id": "identity_326550", "image": "A-OKVQA/images/330726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the metal object in front of the building?\nChoices:\n(A) toll booth\n(B) recycling\n(C) art display\n(D) food stand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.2578125, "loss": 4.703729629516602 }, { "id": "identity_292347", "image": "PMC-VQA/images/PMC8266210_f1_3952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to produce image B out of the following options: MRI, CT, PET, or SPECT?\nChoices:\n(A) MRI \n(B) CT \n(C) PET \n(D) SPECT" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.40625, "loss": 4.016545295715332 }, { "id": "identity_294979", "image": "DVQA/images/bar_train_00126110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest total value when all values are added together?" }, { "from": "gpt", "value": "The answer is hen" } ], "clip_score": 20.703125, "loss": 4.440451622009277 }, { "id": "identity_51485", "image": "DVQA/images/bar_train_00026489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest total units sold across both stores?" }, { "from": "gpt", "value": "The answer is while" } ], "clip_score": 31.984375, "loss": 4.893553256988525 }, { "id": "identity_5317", "image": "AI2D/images/3993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center-most part of the diagram?\nChoices:\n(A) spicules\n(B) core\n(C) none of the above\n(D) sunspot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.53125, "loss": 4.513625144958496 }, { "id": "identity_318254", "image": "PMC-VQA/images/PMC5309216_F1_127713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the T1 weighted imaging indicate?\nChoices:\n(A) Corpus Callosum atrophy (arrow)\n(B) Cerebellar Vermis atrophy (arrow)\n(C) Brainstem Lesion (arrow)\n(D) Frontal Lobe Tumor (arrow)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.65625, "loss": 4.230559349060059 }, { "id": "identity_326294", "image": "ScienceQA/images/20910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Biloxi\n(B) Kansas City\n(C) Charleston\n(D) Huntington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5, "loss": 4.600796699523926 }, { "id": "identity_20550", "image": "A-OKVQA/images/488747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the bucket?\nChoices:\n(A) food/water\n(B) money\n(C) gas\n(D) free shirts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.65625, "loss": 4.313320159912109 }, { "id": "identity_290139", "image": "PMC-VQA/images/PMC9414956_microorganisms-10-01640-f003_385876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bone is impacted by the disease shown in the image? \nOptions:\n(A) Proximal tibia.\n(B) Distal tibia.\n(C) Proximal fibula.\n(D) Distal fibula." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.8125, "loss": 4.514958381652832 }, { "id": "identity_7050", "image": "IconQA/images/30345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a circle as a face?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.609375, "loss": 4.662388324737549 }, { "id": "identity_29161", "image": "ScienceQA/images/7515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Grenada\n(B) Saint Vincent and the Grenadines\n(C) Trinidad and Tobago\n(D) Haiti" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0, "loss": 4.119793891906738 }, { "id": "identity_198147", "image": "PMC-VQA/images/PMC6136216_Fig2_367211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the hip joint in picture A?" }, { "from": "gpt", "value": "The answer is Lateral orientation" } ], "clip_score": 25.21875, "loss": 4.471930980682373 }, { "id": "identity_332247", "image": "A-OKVQA/images/180488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What natural force will cause the boarder to return to the ground after traversing over the jump?\nChoices:\n(A) gravity\n(B) inertia\n(C) fission\n(D) kinetic energy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.2890625, "loss": 4.655276298522949 }, { "id": "identity_327998", "image": "VQA-AS/images/5091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the plants on something?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.1875, "loss": 4.263724327087402 }, { "id": "identity_301997", "image": "A-OKVQA/images/421978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of product do advertisers use these horses to promote?\nChoices:\n(A) horse food\n(B) beer\n(C) pizza\n(D) muffins" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.59375, "loss": 4.031153202056885 }, { "id": "identity_13505", "image": "FigureQA/images/98656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Turquoise have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.0625, "loss": 4.151167869567871 }, { "id": "identity_266115", "image": "ScienceQA/images/4962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object?\nChoices:\n(A) fuzzy\n(B) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6015625, "loss": 4.293454647064209 }, { "id": "identity_398", "image": "VQA-AS/images/8397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the curtains?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 22.546875, "loss": 4.192735195159912 }, { "id": "identity_311278", "image": "DVQA/images/bar_train_00181569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the orchid color represent?" }, { "from": "gpt", "value": "The answer is deal" } ], "clip_score": 23.328125, "loss": 4.687403678894043 }, { "id": "identity_21947", "image": "DocVQA/images/zznp0227_111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the agricultural Economists" }, { "from": "gpt", "value": "The answer is fred schmedt" } ], "clip_score": 20.375, "loss": 4.202309608459473 }, { "id": "identity_286231", "image": "A-OKVQA/images/561251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the indents in the sand? Was it due to foot traffic, acid rain, artillery fire, or hail?\nChoices:\n(A) foot traffic\n(B) acid rain\n(C) artillery fire\n(D) hail" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.796875, "loss": 4.444724082946777 }, { "id": "identity_154957", "image": "AI2D/images/453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many processes are depicted in this water cycle diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 33.34375, "loss": 4.081587791442871 }, { "id": "identity_25935", "image": "PMC-VQA/images/PMC9187638_Fig2_308785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging techniques were used in the analysis?\nChoices:\n(A) MRI and CT \n(B) X-ray and CT \n(C) MRI and MRA \n(D) PET and MRA " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.5243048667907715 }, { "id": "identity_270699", "image": "PMC-VQA/images/PMC6873490_Fig1_72667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lung lobe is the enhanced irregular nodule located?\nChoices:\n(A) right upper lobe\n(B) left upper lobe\n(C) right lower lobe\n(D) left lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.484375, "loss": 4.187531471252441 }, { "id": "identity_317516", "image": "AI2D/images/2567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Below is a diagram of Earth's orbit around the Sun. Which stage is indicated at letter I?\nChoices:\n(A) vernal equinox\n(B) summer solstice\n(C) autumnal equinox\n(D) winter solstice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.96875, "loss": 4.367641925811768 }, { "id": "identity_188472", "image": "PMC-VQA/images/PMC9388805_F1_375737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the human body?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 25.78125, "loss": 4.754683017730713 }, { "id": "identity_215933", "image": "A-OKVQA/images/385405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hanging bird cages can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.59375, "loss": 4.46755313873291 }, { "id": "identity_54671", "image": "DVQA/images/bar_train_00188433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label on the y-axis has a higher value for the green bar than the blue bar?" }, { "from": "gpt", "value": "The answer is pair" } ], "clip_score": 32.71875, "loss": 4.64522123336792 }, { "id": "identity_288371", "image": "PMC-VQA/images/PMC4018937_F2_287976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the perspective shown in the image?\nChoices:\n(A) Anterior\n(B) Lateral\n(C) Posterior\n(D) Dorsal" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.546875, "loss": 4.723467826843262 }, { "id": "identity_34087", "image": "PMC-VQA/images/PMC6759579_F2_41287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of staining is represented in the left panel?\nChoices:\n(A) Hematoxylin and eosin (H&E) \n(B) Immunofluorescence \n(C) Immunohistochemistry \n(D) Giemsa stain " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.734375, "loss": 4.210915565490723 }, { "id": "identity_277731", "image": "Super-CLEVR/images/superCLEVR_new_001689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the motorbike located to the left of the gray rubber regular bus?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 31.875, "loss": 4.336146354675293 }, { "id": "identity_120535", "image": "TextVQA/images/48c0660014cb1412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the book spine that is second from the right?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 28.34375, "loss": 4.159979343414307 }, { "id": "identity_67014", "image": "ScienceQA/images/17376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the background color behind the bird?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.96875, "loss": 4.332866668701172 }, { "id": "identity_15848", "image": "ScienceQA/images/14353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is highlighted?\nChoices:\n(A) Illinois\n(B) Wisconsin\n(C) South Dakota\n(D) Minnesota" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.71875, "loss": 4.325615882873535 }, { "id": "identity_71592", "image": "AI2D/images/253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 31.109375, "loss": 4.661296367645264 }, { "id": "identity_327218", "image": "PMC-VQA/images/PMC3253626_F12_121450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in images E, J, and K?\nChoices:\n(A) gastrostyles\n(B) coenosteal texture\n(C) gastropores\n(D) dactylostyles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.203125, "loss": 5.234766960144043 }, { "id": "identity_73051", "image": "AI2D/images/946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird's beak indicates it might be a bird of prey?" }, { "from": "gpt", "value": "The answer is Bird at label E" } ], "clip_score": 30.921875, "loss": 4.448981285095215 }, { "id": "identity_267863", "image": "A-OKVQA/images/474268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is currently located on the floor among the following options: antelope, surfboard, pumpkin, or egg sandwich?\nChoices:\n(A) antelope\n(B) surfboard\n(C) pumpkin\n(D) egg sandwich" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.5, "loss": 4.167606353759766 }, { "id": "identity_273650", "image": "PMC-VQA/images/PMC8118428_F0001_439517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the sagittal view, what is the shape of the enlarged lymph nodes?\nChoices:\n(A) Spherical\n(B) Irregular\n(C) Oval\n(D) Round" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.15625, "loss": 4.039991855621338 }, { "id": "identity_302256", "image": "ScienceQA/images/1150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this bag made of?\nChoices:\n(A) plastic\n(B) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.328125, "loss": 4.179595470428467 }, { "id": "identity_47068", "image": "FigureQA/images/57294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 29.21875, "loss": 4.108860015869141 }, { "id": "identity_201968", "image": "PMC-VQA/images/PMC6462822_f0045_465006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain slice on the far right?" }, { "from": "gpt", "value": "The answer is pM-A" } ], "clip_score": 25.4375, "loss": 5.644827842712402 }, { "id": "identity_276983", "image": "FigureQA/images/84751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Black or Olive Drab?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.78125, "loss": 4.537609100341797 }, { "id": "identity_131115", "image": "VizWiz/images/VizWiz_train_00015729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object behind the slippers?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.359375, "loss": 4.163269996643066 }, { "id": "identity_134900", "image": "DocVQA/images/lmjf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the drug mentioned in the pediatric study?" }, { "from": "gpt", "value": "The answer is Actos 508" } ], "clip_score": 17.953125, "loss": 4.202548027038574 }, { "id": "identity_104651", "image": "PMC-VQA/images/PMC4247311_f3-etm-09-01-0120_340158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are likely being observed in this image?" }, { "from": "gpt", "value": "The answer is Animal cells" } ], "clip_score": 24.6875, "loss": 4.28233003616333 }, { "id": "identity_2477", "image": "A-OKVQA/images/356286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of shop is this?\nChoices:\n(A) motorcycle sales\n(B) car sales\n(C) used car\n(D) motorcycle repair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.125, "loss": 4.237801551818848 }, { "id": "identity_22997", "image": "A-OKVQA/images/108617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of creatures are eating the oranges?\nChoices:\n(A) spiders\n(B) butterflies\n(C) dragonflies\n(D) snails" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.091480255126953 }, { "id": "identity_62008", "image": "DocVQA/images/tzgv0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What month and year did the organization receive a major grant from the Agency for International Development?" }, { "from": "gpt", "value": "The answer is August" } ], "clip_score": 29.078125, "loss": 4.679872035980225 }, { "id": "identity_325722", "image": "PMC-VQA/images/PMC4651706_fig1_445703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the diagnostic test?\nChoices:\n(A) Displaced fracture at the distal tibia epiphysis\n(B) Non-displaced fracture at the mid tibia diaphysis\n(C) Focal posterior cortical thickening at the level of the fracture\n(D) Focal anterior cortical thickening at the level of the fracture" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.296875, "loss": 4.269068717956543 }, { "id": "identity_296159", "image": "TabMWP/images/37703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on Tucker's tracking of the average waiting time at the doctor's office each month, what was the difference in waiting time between February and March according to the table?" }, { "from": "gpt", "value": "The answer is -3" } ], "clip_score": 38.4375, "loss": 4.133815288543701 }, { "id": "identity_308170", "image": "Super-CLEVR/images/superCLEVR_new_000663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that has the same material as this red object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.515625, "loss": 4.841811180114746 }, { "id": "identity_257791", "image": "A-OKVQA/images/199764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two items placed on the table under the tent, and what is the man doing with one of them?" }, { "from": "gpt", "value": "The answer is A bag of peanuts and a machine; operating the machine" } ], "clip_score": 24.1875, "loss": 4.0597076416015625 }, { "id": "identity_142036", "image": "DVQA/images/bar_train_00058409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the lowest accuracy for the 'dance' algorithm?" }, { "from": "gpt", "value": "The answer is fool" } ], "clip_score": 33.125, "loss": 4.064523220062256 }, { "id": "identity_154862", "image": "ScienceQA/images/11047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate ratio of neuron cell bodies to their extensions visible in this image?" }, { "from": "gpt", "value": "The answer is a ratio in the simplest form" } ], "clip_score": 24.484375, "loss": 4.5773234367370605 }, { "id": "identity_249395", "image": "A-OKVQA/images/261316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for the bike being stored on the bus, and which reason is most likely based on the context of the image?" }, { "from": "gpt", "value": "The answer is storage" } ], "clip_score": 26.640625, "loss": 6.764306545257568 }, { "id": "identity_58931", "image": "DocVQA/images/nxfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the professional qualification of Joanne E. Finley mentioned in the letter?" }, { "from": "gpt", "value": "The answer is MSPH, MD" } ], "clip_score": 19.828125, "loss": 4.099539279937744 }, { "id": "identity_144288", "image": "DocVQA/images/qhfk0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the topic discussed in the call information?" }, { "from": "gpt", "value": "The answer is Fire-safe cigs" } ], "clip_score": 25.921875, "loss": 5.450860977172852 }, { "id": "identity_328633", "image": "A-OKVQA/images/329258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word is the individual in the air most familiar with?\nChoices:\n(A) enzuiguiri\n(B) kickflip\n(C) quark\n(D) ad hoc" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.7578125, "loss": 4.273947715759277 }, { "id": "identity_279861", "image": "AI2D/images/21.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given food chain, what is the direction of energy flow?\nChoices:\n(A) Sun to aquatic plants\n(B) Sun to fish\n(C) fish to plants\n(D) None of these" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.359375, "loss": 4.048110008239746 }, { "id": "identity_283902", "image": "A-OKVQA/images/236282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the women in the lower right engaged in? Is it texting, petting, eating, or photographing?\nChoices:\n(A) texting\n(B) petting\n(C) eating\n(D) photographing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.390625, "loss": 4.127528667449951 }, { "id": "identity_109624", "image": "PMC-VQA/images/PMC6737997_F2_36449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image on the right side?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 25.703125, "loss": 4.370950222015381 }, { "id": "identity_267532", "image": "AI2D/images/1078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost layer of a plant cell among the following options: (A) Amylosplast, (B) Cell Membrane, (C) Cell wall, or (D) Cytoplasm?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.09375, "loss": 4.068591594696045 }, { "id": "identity_185158", "image": "TextVQA/images/b47636280fde8aee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are crossing the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.6875, "loss": 4.906684398651123 }, { "id": "identity_23035", "image": "PMC-VQA/images/PMC7085490_Fig8_139633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the remaining duplication cyst located? \nChoices:\n(A) Stomach \n(B) Large intestine \n(C) Small intestine \n(D) Liver " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.796875, "loss": 4.545289516448975 }, { "id": "identity_12770", "image": "Super-CLEVR/images/superCLEVR_new_001468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The suv that is the same color as the road bike is what size?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 22.0, "loss": 4.402163028717041 }, { "id": "identity_277403", "image": "FigureQA/images/77869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Orange? \n(A) no \n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.359375, "loss": 4.123927116394043 }, { "id": "identity_295670", "image": "A-OKVQA/images/9542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is hidden behind the sign following PHO? Is it:\n(A) n\n(B) t\n(C) m\n(D) g" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5625, "loss": 4.417835712432861 }, { "id": "identity_201697", "image": "VizWiz/images/VizWiz_train_00016026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the background object with a white surface?" }, { "from": "gpt", "value": "The answer is Computer mouse" } ], "clip_score": 18.046875, "loss": 4.0621018409729 }, { "id": "identity_311693", "image": "A-OKVQA/images/411223.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is closest to the individual?\nChoices:\n(A) banana\n(B) barrel\n(C) baby\n(D) tiger" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.4609375, "loss": 4.207728385925293 }, { "id": "identity_288558", "image": "DocVQA/images/xhjh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the current President of First National Bank?" }, { "from": "gpt", "value": "The answer is Jim Stockwell" } ], "clip_score": 23.25, "loss": 4.037367820739746 }, { "id": "identity_327733", "image": "TabMWP/images/11581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The staff of a newspaper examined the number of copies sold over the past few days. On which day were the fewest copies sold?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.5, "loss": 4.33465576171875 }, { "id": "identity_302187", "image": "A-OKVQA/images/288229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is this?\nChoices:\n(A) commercial\n(B) rural\n(C) country\n(D) residential" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.1171875, "loss": 4.289803504943848 }, { "id": "identity_31007", "image": "A-OKVQA/images/160669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the man crouching behind the catcher?\nChoices:\n(A) umpire\n(B) referee\n(C) coach\n(D) announcer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.671875, "loss": 4.511693477630615 }, { "id": "identity_104380", "image": "VQA2.0/images/64392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.53125, "loss": 4.4163665771484375 }, { "id": "identity_280540", "image": "FigureQA/images/66797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.640625, "loss": 4.017112731933594 }, { "id": "identity_138062", "image": "DocVQA/images/hmvj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size group of the company?" }, { "from": "gpt", "value": "The answer is Decile 4" } ], "clip_score": 14.9765625, "loss": 4.142797946929932 }, { "id": "identity_316238", "image": "AI2D/images/749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to this month?\nChoices:\n(A) C\n(B) A\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.40625, "loss": 4.282346725463867 }, { "id": "identity_298786", "image": "PMC-VQA/images/PMC8843977_f0005_194482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the RT planning MRI image (A) depict?\nChoices:\n(A) The markers evaluated in the exposed structures\n(B) The entire brain of the patient\n(C) The positioning of the radiation therapy\n(D) The tools utilized for radiation therapy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.34375, "loss": 4.085728645324707 }, { "id": "identity_319035", "image": "DVQA/images/bar_train_00190816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which one has the highest accuracy?" }, { "from": "gpt", "value": "The answer is crew" } ], "clip_score": 26.578125, "loss": 4.694243431091309 }, { "id": "identity_323035", "image": "PMC-VQA/images/PMC5986059_ccr31539-fig-0001_323804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the foot is affected by these hyperkeratotic nodules and papules?\nChoices:\n(A) Heel\n(B) Arch\n(C) Toes\n(D) Instep" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.3125, "loss": 4.146402359008789 }, { "id": "identity_333254", "image": "PMC-VQA/images/PMC3290045_F0001_127783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this sonography image indicate about the pelvis?\nChoices:\n(A) Normal pelvis\n(B) Small cystic lesion\n(C) Large cystic lesion\n(D) Solid lesion." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.265625, "loss": 4.176848411560059 }, { "id": "identity_38279", "image": "ScienceQA/images/8345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term matches the picture?\nChoices:\n(A) endotherms\n(B) ectotherms" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6484375, "loss": 4.498591899871826 }, { "id": "identity_21429", "image": "A-OKVQA/images/369504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of facility can be found nearby?\nChoices:\n(A) fish pond\n(B) bike trail\n(C) hiking trail\n(D) picnic area" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.953125, "loss": 4.323028087615967 }, { "id": "identity_325155", "image": "IconQA/images/98462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to stop on?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.96875, "loss": 4.125384330749512 }, { "id": "identity_280090", "image": "ScienceQA/images/16740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area?\nChoices:\n(A) Haiti\n(B) Trinidad and Tobago\n(C) Barbados\n(D) Grenada" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.96875, "loss": 4.86269998550415 }, { "id": "identity_294102", "image": "A-OKVQA/images/408624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the small floats on the sides of the boats here in preventing the boats from doing what?\nChoices:\n(A) getting lost\n(B) soaring\n(C) bumping\n(D) sinking" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.890625, "loss": 4.297286510467529 }, { "id": "identity_32053", "image": "PMC-VQA/images/PMC2410174_fig1_23923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are being compared to show the reduction in tumor volume in patient #5? \nChoices:\n(A) T1W images \n(B) FLAIR images \n(C) DWI images \n(D) T2W-FSE images " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.125, "loss": 4.045084476470947 }, { "id": "identity_106256", "image": "TextVQA/images/8f873b1cb9dce21f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many camera lenses are visible on the phone on the right?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.265625, "loss": 4.447635173797607 }, { "id": "identity_223587", "image": "A-OKVQA/images/204232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the barrier the man is jumping over?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.953125, "loss": 4.075291633605957 }, { "id": "identity_319997", "image": "FigureQA/images/24396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.40625, "loss": 4.124048709869385 }, { "id": "identity_187236", "image": "PMC-VQA/images/PMC8100688_F1_434449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red highlighted areas in the brain maps?" }, { "from": "gpt", "value": "The answer is Regions of increased activity or interest" } ], "clip_score": 32.09375, "loss": 4.10610294342041 }, { "id": "identity_334995", "image": "FigureQA/images/40801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum value for Indian Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.9375, "loss": 4.303178787231445 }, { "id": "identity_26189", "image": "AI2D/images/3431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The ovary is part of the stamen, right?\nChoices:\n(A) No, the ovary is part of the pistil.\n(B) No, the ovary are in women.\n(C) No, the ovary is what doctors poke at.\n(D) Yes, the ovary is part of the stamen." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.203125, "loss": 4.068436145782471 }, { "id": "identity_266987", "image": "DVQA/images/bar_train_00143636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, starting from the left, is labeled as the third group?" }, { "from": "gpt", "value": "The answer is chip" } ], "clip_score": 15.84375, "loss": 4.2096357345581055 }, { "id": "identity_336433", "image": "DVQA/images/bar_train_00064101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the end?" }, { "from": "gpt", "value": "The answer is ladder" } ], "clip_score": 19.171875, "loss": 4.219272136688232 }, { "id": "identity_329041", "image": "PMC-VQA/images/PMC4683868_Fig7_454414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of section is depicted in the images?\nChoices:\n(A) Coronal section\n(B) Sagittal section\n(C) Transverse section\n(D) Frontal section" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1484375, "loss": 4.32745361328125 }, { "id": "identity_37994", "image": "PMC-VQA/images/PMC3011101_F0005_82496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the Enterprise stent located? \nChoices:\n(A) Left posterior cerebral artery \n(B) Basilar artery \n(C) Right posterior cerebral artery \n(D) Left vertebrobasilar junction " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 4.0417022705078125 }, { "id": "identity_229764", "image": "ScienceQA/images/695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state on the map that has Dover as its capital, and then name the capital of the state directly to its west" }, { "from": "gpt", "value": "The answer is Annapolis" } ], "clip_score": 25.46875, "loss": 4.692362308502197 }, { "id": "identity_108549", "image": "Super-CLEVR/images/superCLEVR_new_001442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is The smallest vehicle is gold" } ], "clip_score": 25.90625, "loss": 4.089888095855713 }, { "id": "identity_292860", "image": "PMC-VQA/images/PMC4928431_F4_46777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which hemisphere does restricted diffusion affect? \nChoices:\n(A) Left hemisphere\n(B) Right hemisphere\n(C) Both hemispheres\n(D) Frontal hemisphere" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.359375, "loss": 4.333935260772705 }, { "id": "identity_290471", "image": "TQA/images/question_images/human_system_nervous_6191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is situated at the front?\nChoices:\n(A) Lateral sulcus\n(B) Parietal lobe\n(C) Frontal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.09375, "loss": 4.354151725769043 }, { "id": "identity_282322", "image": "PMC-VQA/images/PMC6897007_F9_80014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which three views are included in the MPR Planes of CT Volume? \n(A) Sagittal, coronal, oblique \n(B) Right to left, posterior to anterior, oblique \n(C) Left to right, anterior to posterior, axial \n(D) Anterior to posterior, sagittal, oblique" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.421875, "loss": 4.184371471405029 }, { "id": "identity_335656", "image": "PMC-VQA/images/PMC7274469_pntd.0008141.g002_184814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located?\nChoices:\n(A) Left lobe\n(B) Right lobe\n(C) Middle lobe\n(D) Entire lung" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.848730564117432 }, { "id": "identity_326048", "image": "DVQA/images/bar_train_00185527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.53125, "loss": 4.177024841308594 }, { "id": "identity_226747", "image": "DVQA/images/bar_train_00120334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across both shops, and which item has the highest total sales?" }, { "from": "gpt", "value": "The answer is bird and cap" } ], "clip_score": 37.0625, "loss": 4.142259120941162 }, { "id": "identity_255463", "image": "A-OKVQA/images/561037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary purpose of the organization featured on the white square sticker on the motorcycle, and how does it relate to the context of the image?" }, { "from": "gpt", "value": "The answer is Provide financial protection and risk management services for vehicles" } ], "clip_score": 20.75, "loss": 4.7489423751831055 }, { "id": "identity_32781", "image": "Super-CLEVR/images/superCLEVR_new_000003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that is the same material as the car?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.546875, "loss": 5.041713237762451 }, { "id": "identity_300147", "image": "TQA/images/abc_question_images/earth_moon_phases_16010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the full moon?\nChoices:\n(A) W\n(B) M\n(C) H\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 4.093865871429443 }, { "id": "identity_272914", "image": "FigureQA/images/70854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Tomato or Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.34375, "loss": 4.741843223571777 }, { "id": "identity_336672", "image": "IconQA/images/64605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a circular face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.15625, "loss": 4.746356964111328 }, { "id": "identity_334767", "image": "DocVQA/images/gjdw0228_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom was the mail addressed?" }, { "from": "gpt", "value": "The answer is dr. ian munro" } ], "clip_score": 25.375, "loss": 4.202646255493164 }, { "id": "identity_281805", "image": "A-OKVQA/images/488747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the bucket contain?\nChoices:\n(A) food/water\n(B) money\n(C) gas\n(D) free shirts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.40625, "loss": 4.507630348205566 }, { "id": "identity_34609", "image": "IconQA/images/103983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is fourth?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.84375, "loss": 4.09749698638916 }, { "id": "identity_279703", "image": "PMC-VQA/images/PMC3447726_F4_156431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the image?\nOptions:\n(A) Bovine oviducts at Day 1.5\n(B) PGRMC1 localization in the infundibulum\n(C) PGRMC1 localization in the ampulla\n(D) PGRMC1 localization in the isthmus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.618589401245117 }, { "id": "identity_292542", "image": "PMC-VQA/images/PMC5787541_F1_268448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which artery is the left internal mammary artery graft connected?\nChoices:\n(A) Right anterior descending coronary artery\n(B) Left anterior descending coronary artery\n(C) Ramus circumflexus\n(D) Aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.625, "loss": 4.101130962371826 }, { "id": "identity_52234", "image": "DVQA/images/bar_train_00130717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is the least preferred by people?" }, { "from": "gpt", "value": "The answer is affair" } ], "clip_score": 27.4375, "loss": 4.666046142578125 }, { "id": "identity_61557", "image": "PlotQA/images/87307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of the financial account from 2002 to 2010?" }, { "from": "gpt", "value": "The answer is Decreased sharply then remained constant" } ], "clip_score": 33.75, "loss": 4.219172477722168 }, { "id": "identity_7998", "image": "PMC-VQA/images/PMC7339413_Fig3_206162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which anatomical structure is the soft tissue mass located in the patient? \nChoices:\n(A) Right scapula \n(B) Left scapula \n(C) Right clavicle \n(D) Left clavicle " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.045581817626953 }, { "id": "identity_23042", "image": "VQA2.0/images/30841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sunset?" }, { "from": "gpt", "value": "The answer is not yet" } ], "clip_score": 13.9765625, "loss": 4.528474807739258 }, { "id": "identity_109628", "image": "TextVQA/images/9719648770361a75.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the book cover?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.15625, "loss": 4.024481773376465 }, { "id": "identity_34546", "image": "Super-CLEVR/images/superCLEVR_new_000727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The matte minivan that is the same color as the utility bike is what size?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 27.578125, "loss": 4.0145463943481445 }, { "id": "identity_321096", "image": "PMC-VQA/images/PMC8934646_F2_226881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of views are depicted in the caption?\nChoices:\n(A) Side views\n(B) Top views\n(C) Close-up views\n(D) Full views" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.421875, "loss": 4.2962775230407715 }, { "id": "identity_317270", "image": "VizWiz/images/VizWiz_train_00005316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe this image including any text?" }, { "from": "gpt", "value": "The answer is flyer for matial arts classes" } ], "clip_score": 18.671875, "loss": 4.237438678741455 }, { "id": "identity_204806", "image": "PMC-VQA/images/PMC3470993_F1_160087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 22.578125, "loss": 4.268191814422607 }, { "id": "identity_15494", "image": "A-OKVQA/images/515612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What powers the musical instrument shown here?\nChoices:\n(A) solar\n(B) gas\n(C) battery\n(D) oil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.6796875, "loss": 4.086834907531738 }, { "id": "identity_325386", "image": "ScienceQA/images/16030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) each refrigerator . . . the surroundings\n(B) the surroundings . . . each refrigerator" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.0, "loss": 4.0258660316467285 }, { "id": "identity_303444", "image": "FigureQA/images/89838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5, "loss": 4.025125980377197 }, { "id": "identity_267064", "image": "Super-CLEVR/images/superCLEVR_new_000260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the cyan minivan located to the left of the green minivan the same size as the large shiny car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.09375, "loss": 5.163581848144531 }, { "id": "identity_26280", "image": "IconQA/images/95035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.1875, "loss": 4.474035739898682 }, { "id": "identity_295931", "image": "A-OKVQA/images/159979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the square surrounding the black umbrella area?\nOptions:\n(A) red\n(B) yellow\n(C) blue\n(D) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.78125, "loss": 4.226074695587158 }, { "id": "identity_262615", "image": "IconQA/images/98211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by filling in the blank with a time word, not a number. It is (_) past six." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 30.03125, "loss": 4.37644100189209 }, { "id": "identity_218713", "image": "VQA-AS/images/7975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living creatures are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.75, "loss": 4.618764877319336 }, { "id": "identity_6553", "image": "AI2D/images/2670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What comes after the March Equinox?\nChoices:\n(A) January\n(B) September Equinox\n(C) June Solstice\n(D) December Solstice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.53125, "loss": 4.508243560791016 }, { "id": "identity_5334", "image": "ScienceQA/images/12221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of California?\nChoices:\n(A) Little Rock\n(B) Sacramento\n(C) San Francisco\n(D) Boston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.140625, "loss": 4.365713119506836 }, { "id": "identity_107527", "image": "VizWiz/images/VizWiz_train_00006743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of container is shown in the image?" }, { "from": "gpt", "value": "The answer is A spice jar" } ], "clip_score": 22.125, "loss": 4.216409206390381 }, { "id": "identity_311236", "image": "Super-CLEVR/images/superCLEVR_new_000416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the thing that is on the left side of the vehicle and behind the aircraft?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 24.546875, "loss": 4.0323076248168945 }, { "id": "identity_329760", "image": "A-OKVQA/images/5131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of vessel is associated with the people marching here?\nChoices:\n(A) bikes\n(B) plane\n(C) water craft\n(D) golf cart" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.890625, "loss": 4.37404727935791 }, { "id": "identity_264493", "image": "AI2D/images/1326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anatomical structure is located between the lungs?\nChoices:\n(A) none of the above\n(B) alveoli\n(C) trachea\n(D) larynx" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.4375, "loss": 4.583238124847412 }, { "id": "identity_257002", "image": "Super-CLEVR/images/superCLEVR_new_000423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are the same color as the small rubber biplane, and what are their sizes?" }, { "from": "gpt", "value": "The answer is One object, large" } ], "clip_score": 26.4375, "loss": 5.2037787437438965 }, { "id": "identity_34510", "image": "A-OKVQA/images/550055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the circular object under the wing?\nChoices:\n(A) aileron\n(B) jet engine\n(C) compartment\n(D) trash can" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.4375, "loss": 4.661427021026611 }, { "id": "identity_17345", "image": "TextVQA/images/04b1e42b4117a914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what type of cab is the yellow cab?" }, { "from": "gpt", "value": "The answer is union" } ], "clip_score": 25.046875, "loss": 5.5328474044799805 }, { "id": "identity_309730", "image": "PMC-VQA/images/PMC8270928_Fig5_6572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this depict regarding the mineral composition and textures? \nChoices:\n(A) Orthopyroxene with Fe-rich corroded core and Fe-poor euhedral rim \n(B) Olivine with Fe-rich corroded core and Fe-poor euhedral rim \n(C) Clinopyroxene with Fe-rich corroded core and Fe-rich euhedral rim \n(D) Plagioclase with Fe-rich corroded core and Fe-poor euhedral rim." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 4.533287048339844 }, { "id": "identity_189996", "image": "A-OKVQA/images/78466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tennis player's skirt?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.09375, "loss": 4.76602029800415 }, { "id": "identity_154079", "image": "AI2D/images/3788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the structure labeled D?" }, { "from": "gpt", "value": "The answer is Anchor and absorb" } ], "clip_score": 23.8125, "loss": 4.509198188781738 }, { "id": "identity_215182", "image": "PMC-VQA/images/PMC6536547_Fig8_485626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface on which the bacteria are located?" }, { "from": "gpt", "value": "The answer is The texture description in a few words" } ], "clip_score": 32.0, "loss": 4.623302936553955 }, { "id": "identity_294643", "image": "IconQA/images/32704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on? \n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 4.500319480895996 }, { "id": "identity_278748", "image": "PMC-VQA/images/PMC3539948_F2_177557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is depicted as decreasing?\nChoices:\n(A) Small myelinated fibers,\n(B) Large non-myelinated fibers,\n(C) Large myelinated fibers,\n(D) Small non-myelinated fibers." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.671875, "loss": 4.162045001983643 }, { "id": "identity_37744", "image": "FigureQA/images/96174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Orange have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.307560920715332 }, { "id": "identity_14007", "image": "A-OKVQA/images/328810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The desire to do what is likely driving the woman to rearrange the magnets?\nChoices:\n(A) clean\n(B) color sort\n(C) form words\n(D) aesthetics" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0546875, "loss": 4.678473472595215 }, { "id": "identity_332071", "image": "VQA2.0/images/409681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of hat is this person wearing?" }, { "from": "gpt", "value": "The answer is sun" } ], "clip_score": 15.2578125, "loss": 4.857438087463379 }, { "id": "identity_11297", "image": "AI2D/images/1081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What encloses the cell?\nChoices:\n(A) chromatin\n(B) plasma membrane\n(C) cytosol\n(D) centriole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.6875, "loss": 4.327546119689941 }, { "id": "identity_270975", "image": "IconQA/images/59834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you randomly choose a marble without looking, which color are you more inclined to select? \nChoices:\n(A) pink\n(B) black\n(C) neither; black and pink have equal chances of being selected." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.125, "loss": 4.0720062255859375 }, { "id": "identity_34221", "image": "A-OKVQA/images/4266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this activity for?\nChoices:\n(A) racing\n(B) photo taking\n(C) practice\n(D) leisure" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.15625, "loss": 4.709573268890381 }, { "id": "identity_4089", "image": "IconQA/images/4682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show two things Jackson did Sunday before bed. Which did Jackson do later?\nChoices:\n(A) ate dinner\n(B) took a shower" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.437298774719238 }, { "id": "identity_125258", "image": "A-OKVQA/images/525567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.703125, "loss": 4.001789569854736 }, { "id": "identity_335111", "image": "A-OKVQA/images/522262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item would typically be used with these things?\nChoices:\n(A) missile launcher\n(B) windshield wipers\n(C) radar\n(D) chain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.3515625, "loss": 4.15588903427124 }, { "id": "identity_104346", "image": "Super-CLEVR/images/superCLEVR_new_001163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned directly in front of the red bicycle?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 23.09375, "loss": 4.074142932891846 }, { "id": "identity_335979", "image": "A-OKVQA/images/369221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of landscape is shown in this image?\nChoices:\n(A) beach\n(B) plain\n(C) sand dune\n(D) desert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.09375, "loss": 4.370370388031006 }, { "id": "identity_338238", "image": "PMC-VQA/images/PMC5732013_FIG5_253432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the glioma located?\nChoices:\n(A) Posterior occipital\n(B) Anterior occipital\n(C) Temporal\n(D) Parietal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.476932525634766 }, { "id": "identity_111785", "image": "VQA-AS/images/14736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of window design is present in the room?" }, { "from": "gpt", "value": "The answer is A round arch window with divided panes" } ], "clip_score": 28.0, "loss": 4.030159950256348 }, { "id": "identity_208931", "image": "VQA2.0/images/539299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.859375, "loss": 4.420365333557129 }, { "id": "identity_307775", "image": "PMC-VQA/images/PMC5035136_f0025_76894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in these scatterplots on the right?\nChoices:\n(A) PPI correlation\n(B) Contrast of feature\n(C) MDN connectivity\n(D) Performance measurement" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.78125, "loss": 4.398189544677734 }, { "id": "identity_104652", "image": "PMC-VQA/images/PMC4247311_f3-etm-09-01-0120_340158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label at the bottom of the image indicating?" }, { "from": "gpt", "value": "The answer is The sample is treated with siRNA_scr" } ], "clip_score": 34.34375, "loss": 4.348461151123047 }, { "id": "identity_266600", "image": "ScienceQA/images/12221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of California?\n(A) Little Rock\n(B) Sacramento\n(C) San Francisco\n(D) Boston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.280159950256348 }, { "id": "identity_214669", "image": "TextVQA/images/2d407690e9cbd2df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are present on the umbrellas?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.21875, "loss": 4.053874969482422 }, { "id": "identity_2169", "image": "A-OKVQA/images/578334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this location?\nChoices:\n(A) starting line\n(B) park\n(C) riverside\n(D) finishing line" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.75, "loss": 4.2517476081848145 }, { "id": "identity_221889", "image": "VQA2.0/images/357572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person on the campaign sign?" }, { "from": "gpt", "value": "The answer is Dee Hunter" } ], "clip_score": 27.3125, "loss": 4.232920169830322 }, { "id": "identity_119923", "image": "VQA-AS/images/4290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.25, "loss": 4.228411674499512 }, { "id": "identity_313878", "image": "ScienceQA/images/15015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) South America\n(B) Antarctica\n(C) Europe\n(D) Africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.952884197235107 }, { "id": "identity_333753", "image": "PMC-VQA/images/PMC4514952_Fig2_409253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the abnormalities found in the brain?\nChoices:\n(A) temporal lobe\n(B) frontal lobe\n(C) parietal lobe\n(D) occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.84375, "loss": 4.26009464263916 }, { "id": "identity_131984", "image": "A-OKVQA/images/166586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals can be seen wearing hats in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.46875, "loss": 4.309001445770264 }, { "id": "identity_316759", "image": "VizWiz/images/VizWiz_train_00000062.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.5859375, "loss": 4.025042533874512 }, { "id": "identity_273555", "image": "FigureQA/images/23729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following statements correctly compares Peru and Purple in terms of being less than each other? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.21875, "loss": 4.5201921463012695 }, { "id": "identity_262104", "image": "FigureQA/images/83308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Magenta the curve with the largest area underneath it?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.292775630950928 }, { "id": "identity_12918", "image": "A-OKVQA/images/318820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which section of the supermarket is this man standing?\nChoices:\n(A) bakery\n(B) produce\n(C) checkout\n(D) meat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.140625, "loss": 4.342892169952393 }, { "id": "identity_112576", "image": "PMC-VQA/images/PMC4947321_Fig1_52425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign called that is seen in the image?" }, { "from": "gpt", "value": "The answer is Target sign" } ], "clip_score": 24.421875, "loss": 4.965170860290527 }, { "id": "identity_325806", "image": "A-OKVQA/images/296876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these animals gathering in the middle of?\nChoices:\n(A) parking lot\n(B) lake\n(C) field\n(D) forest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.4375, "loss": 4.074868679046631 }, { "id": "identity_215960", "image": "VizWiz/images/VizWiz_train_00007638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light reflections can be seen on the countertop?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.640625, "loss": 4.271437644958496 }, { "id": "identity_295362", "image": "A-OKVQA/images/456756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what characteristic is this animal recognized?\nChoices:\n(A) spinning webs\n(B) long neck\n(C) gills\n(D) armored shell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.342948913574219 }, { "id": "identity_4620", "image": "AI2D/images/3229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where on the diagram do you see the roots?\nChoices:\n(A) (i)\n(B) (ii)\n(C) (iv)\n(D) (iii)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.3125, "loss": 4.089663505554199 }, { "id": "identity_324424", "image": "VQA2.0/images/554518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are these tablecloths?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 16.484375, "loss": 4.616003513336182 }, { "id": "identity_262781", "image": "ScienceQA/images/20960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city serves as the capital of Massachusetts?\nChoices:\n(A) Boston\n(B) Cambridge\n(C) Providence\n(D) Reno" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.137396812438965 }, { "id": "identity_197674", "image": "PMC-VQA/images/PMC6292007_Fig4_411132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the background in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 21.015625, "loss": 4.532969951629639 }, { "id": "identity_337261", "image": "A-OKVQA/images/204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is depicted?\nChoices:\n(A) road\n(B) air\n(C) rail\n(D) water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.90625, "loss": 4.498350620269775 }, { "id": "identity_313667", "image": "PMC-VQA/images/PMC8045906_acn351333-fig-0002_417680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What surgical procedure was done before these images were taken?\nChoices:\n(A) Craniotomy \n(B) Thalamotomy \n(C) Pallidotomy \n(D) Angioplasty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.2675676345825195 }, { "id": "identity_281198", "image": "DVQA/images/bar_train_00041801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of algorithms with accuracies exceeding 2?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 31.15625, "loss": 4.24922513961792 }, { "id": "identity_249118", "image": "ScienceQA/images/1560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two properties do these four objects have in common?" }, { "from": "gpt", "value": "The answer is smooth and hard" } ], "clip_score": 25.859375, "loss": 4.419387340545654 }, { "id": "identity_127950", "image": "TextVQA/images/00922ab06cc71238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of machinery is shown in the image?" }, { "from": "gpt", "value": "The answer is Engine" } ], "clip_score": 22.34375, "loss": 4.020730495452881 }, { "id": "identity_264044", "image": "A-OKVQA/images/514601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the signs hanging from the ceiling?\nChoices:\n(A) advertisements\n(B) identify benches\n(C) guide travelers\n(D) cameras" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.46875, "loss": 4.831548690795898 }, { "id": "identity_27539", "image": "ScienceQA/images/16420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) New Zealand\n(B) Kiribati\n(C) Nauru\n(D) Vanuatu" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.125, "loss": 4.878602027893066 }, { "id": "identity_51070", "image": "DocVQA/images/jggn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the surgeon's last name?" }, { "from": "gpt", "value": "The answer is Tigges" } ], "clip_score": 16.890625, "loss": 4.819548606872559 }, { "id": "identity_312661", "image": "A-OKVQA/images/252968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What powers this train?\nChoices:\n(A) steam\n(B) battery\n(C) gas\n(D) electricity" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.359375, "loss": 4.054342746734619 }, { "id": "identity_28468", "image": "A-OKVQA/images/95744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Encouraging what American Ice cream treat is an obvious choice for these vendors?\nChoices:\n(A) banana split\n(B) chocolate malt\n(C) chocolate sundae\n(D) brownie cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.40625, "loss": 4.230530261993408 }, { "id": "identity_36961", "image": "PMC-VQA/images/PMC2994755_pone-0014150-g005_80034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the activation and correlation maps superimposed on? \nChoices:\n(A) Whole-body images \n(B) High-resolution brain images \n(C) CT scans \n(D) X-rays " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.307092189788818 }, { "id": "identity_16124", "image": "ScienceQA/images/18416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New Jersey\n(B) South Carolina\n(C) North Carolina\n(D) Delaware" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.125, "loss": 4.194612979888916 }, { "id": "identity_295150", "image": "IconQA/images/27960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.9375, "loss": 4.475793361663818 }, { "id": "identity_323604", "image": "IconQA/images/56313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.040869235992432 }, { "id": "identity_28469", "image": "TabMWP/images/37704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.984375, "loss": 4.418914318084717 }, { "id": "identity_130801", "image": "VQA2.0/images/34624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are on the top shelf?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.796875, "loss": 5.980090618133545 }, { "id": "identity_132875", "image": "VizWiz/images/VizWiz_train_00001074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheat stalks can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.828125, "loss": 4.293581962585449 }, { "id": "identity_319182", "image": "FigureQA/images/85481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Seafoam the largest?\n\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.984375, "loss": 4.496170997619629 }, { "id": "identity_132752", "image": "Super-CLEVR/images/superCLEVR_new_001053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located between the purple motorcycle and the green van?" }, { "from": "gpt", "value": "The answer is White and grey" } ], "clip_score": 31.75, "loss": 5.209064960479736 }, { "id": "identity_297077", "image": "A-OKVQA/images/136185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can this bookshelf be found?\nChoices:\n(A) home\n(B) courtroom\n(C) store\n(D) library" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.296875, "loss": 4.342803478240967 }, { "id": "identity_296370", "image": "VQA2.0/images/357641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is toilet paper required for the toilet?" }, { "from": "gpt", "value": "The answer is unknown" } ], "clip_score": 17.765625, "loss": 4.404399394989014 }, { "id": "identity_35885", "image": "PMC-VQA/images/PMC4459984_pone.0128306.g007_394562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow in the photomicrographs represent?\nChoices:\n(A) nucleus \n(B) cytoplasm \n(C) storage vesicles \n(D) extracellular matrix " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.688125133514404 }, { "id": "identity_286256", "image": "A-OKVQA/images/463533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man performing while balancing on one leg?\nChoices:\n(A) singing\n(B) hopping\n(C) pitching\n(D) hiding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.890625, "loss": 4.34720516204834 }, { "id": "identity_132558", "image": "Super-CLEVR/images/superCLEVR_new_000819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle located to the left of the green motorbike?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.375, "loss": 4.421044826507568 }, { "id": "identity_65408", "image": "ScienceQA/images/13025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are interacting with the device?" }, { "from": "gpt", "value": "The answer is Two individuals" } ], "clip_score": 22.0, "loss": 4.855055332183838 }, { "id": "identity_322292", "image": "PMC-VQA/images/PMC4581082_Fig2_426468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stain was used to obtain this? \nChoices:\n(A) Giemsa stain \n(B) Hematoxylin stain \n(C) Eosin stain \n(D) Gram stain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.375, "loss": 4.559417724609375 }, { "id": "identity_287577", "image": "PMC-VQA/images/PMC7447704_Fig15_240131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image was utilized to capture the rim enhancement: \n(A) T2-weighted image \n(B) CT scan \n(C) T1-weighted image \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.78125, "loss": 4.306573867797852 }, { "id": "identity_25619", "image": "A-OKVQA/images/124609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the smoke near the horses coming from?\nChoices:\n(A) auto exhaust\n(B) horses kicking\n(C) sticks\n(D) mountains" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.890625, "loss": 4.636317729949951 }, { "id": "identity_25864", "image": "PMC-VQA/images/PMC4638099_Fig1_442499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the pattern of stripes and dots within the white matter resemble? \nChoices:\n(A) Zebra print \n(B) Leopard spots \n(C) Tiger skin \n(D) Snake scales " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.21875, "loss": 4.688834190368652 }, { "id": "identity_329663", "image": "A-OKVQA/images/399628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the round platform here do?\nChoices:\n(A) sit still\n(B) turn upsidedown\n(C) nothing\n(D) rotate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.7265625, "loss": 4.263970375061035 }, { "id": "identity_303081", "image": "FigureQA/images/70905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.640625, "loss": 4.711251735687256 }, { "id": "identity_3495", "image": "PMC-VQA/images/PMC3795359_F2_237149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the section of the brain where the staining is observed? \nChoices:\n(A) Coronal section \n(B) Axial section \n(C) Sagittal section \n(D) Frontal section " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.234375, "loss": 4.023451328277588 }, { "id": "identity_308690", "image": "FigureQA/images/30126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Medium Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.828125, "loss": 4.03458309173584 }, { "id": "identity_292293", "image": "FigureQA/images/47993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.625, "loss": 4.525954246520996 }, { "id": "identity_222897", "image": "TextVQA/images/5af917d6bf6979b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tractors are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.015625, "loss": 4.506606101989746 }, { "id": "identity_260345", "image": "TQA/images/abc_question_images/human_system_eye_16140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of the labeled elements is responsible for controlling the amount of light that enters the eye, and what is its relationship with the pupil?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 31.4375, "loss": 4.771002769470215 }, { "id": "identity_336904", "image": "DocVQA/images/gscg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is in charge of chain authorisation?" }, { "from": "gpt", "value": "The answer is pat chapel" } ], "clip_score": 22.203125, "loss": 4.871413230895996 }, { "id": "identity_129147", "image": "A-OKVQA/images/512156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of bread can be seen on the table?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.859375, "loss": 4.333555698394775 }, { "id": "identity_212815", "image": "VQA2.0/images/137748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the sidewalk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.515625, "loss": 4.312544822692871 }, { "id": "identity_211721", "image": "VQA2.0/images/174185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.890625, "loss": 4.328519821166992 }, { "id": "identity_299600", "image": "A-OKVQA/images/79654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the example of people on horses in a line meant to represent?\nChoices:\n(A) rodeo\n(B) street performing\n(C) parade\n(D) crowd control" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.71875, "loss": 4.57541036605835 }, { "id": "identity_194411", "image": "PMC-VQA/images/PMC3834309_pone-0080939-g005_244798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the marked region labeled as number 2?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 16.1875, "loss": 4.2131242752075195 }, { "id": "identity_305770", "image": "Super-CLEVR/images/superCLEVR_new_000183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there another big green object that is the same shape as the large green metallic thing; what material is it?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 19.375, "loss": 5.120058536529541 }, { "id": "identity_62077", "image": "DocVQA/images/ysmp0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the program mentioned in the inquiries section?" }, { "from": "gpt", "value": "The answer is INP Program" } ], "clip_score": 12.96875, "loss": 4.636604309082031 }, { "id": "identity_222142", "image": "A-OKVQA/images/494637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the carriage?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.109375, "loss": 4.251513957977295 }, { "id": "identity_189019", "image": "PMC-VQA/images/PMC7071643_Fig3_133736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of chronic ischemic changes in the brain?" }, { "from": "gpt", "value": "The answer is No clear signs in this DWI sequence" } ], "clip_score": 30.203125, "loss": 4.41915225982666 }, { "id": "identity_291168", "image": "A-OKVQA/images/217989.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color door should you open first if you needed to freeze your vodka?\n(A) brown\n(B) white\n(C) glass\n(D) chrome" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.3515625, "loss": 4.075806617736816 }, { "id": "identity_129439", "image": "VQA2.0/images/419664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stuffed animals are on the couch?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.765625, "loss": 4.969647407531738 }, { "id": "identity_19287", "image": "PMC-VQA/images/PMC4107474_F2_307937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the kidney is shown in the image? \nChoices:\n(A) renal cortex \n(B) nephron \n(C) renal medulla \n(D) renal pelvis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.859375, "loss": 4.575236797332764 }, { "id": "identity_36528", "image": "A-OKVQA/images/441379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number on outfit worn by the goats?\nChoices:\n(A) group number\n(B) age\n(C) bib number\n(D) height" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.296875, "loss": 4.055386066436768 }, { "id": "identity_143027", "image": "DVQA/images/bar_train_00050305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups have \"flavor\" sales less than 2 units?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 25.59375, "loss": 4.37190055847168 }, { "id": "identity_117344", "image": "VQA2.0/images/546285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the metal basket target?" }, { "from": "gpt", "value": "The answer is Silver and blue" } ], "clip_score": 19.21875, "loss": 4.185003757476807 }, { "id": "identity_30038", "image": "A-OKVQA/images/172924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which continent is the train?\nChoices:\n(A) north america\n(B) europe\n(C) africa\n(D) asia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.6875, "loss": 4.127367973327637 }, { "id": "identity_222784", "image": "PMC-VQA/images/PMC2562460_F6_28478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar at the bottom of the image indicating?" }, { "from": "gpt", "value": "The answer is Reference for size of structures in known units" } ], "clip_score": 21.03125, "loss": 4.599709987640381 }, { "id": "identity_327690", "image": "PMC-VQA/images/PMC8587501_Fig13_110756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for this image?\nChoices:\n(A) Coronal CT \n(B) CE FS T1W image \n(C) PET scan \n(D) Ultrasound scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.59375, "loss": 4.391236782073975 }, { "id": "identity_101", "image": "ScienceQA/images/7565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird's beak is also adapted to catch insects?\nChoices:\n(A) European robin\n(B) military macaw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.203125, "loss": 4.0297675132751465 }, { "id": "identity_314116", "image": "PMC-VQA/images/PMC4079070_F9_302790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in higher magnification in the marked area of the image?\nChoices:\n(A) The cortical gyri in the occipital cortex\n(B) The cortical gyri in the prefrontal cortex\n(C) The cerebellum\n(D) The thalamus." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 4.407853126525879 }, { "id": "identity_274028", "image": "DVQA/images/bar_train_00172397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In any shop, which item had the lowest number of units sold?" }, { "from": "gpt", "value": "The answer is fall" } ], "clip_score": 28.9375, "loss": 4.013891696929932 }, { "id": "identity_294550", "image": "IconQA/images/92856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) certain\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.640625, "loss": 4.227686882019043 }, { "id": "identity_233095", "image": "PMC-VQA/images/PMC6026116_Fig4_335593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of a combined image (PA and US) shown in (a), and how do these features help in identifying the image type?" }, { "from": "gpt", "value": "The answer is Integration of PA and US features, red areas, grayscale structure" } ], "clip_score": 21.625, "loss": 4.3184990882873535 }, { "id": "identity_323624", "image": "IconQA/images/103209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, which color are you more likely to get?\nChoices:\n(A) pink\n(B) neither; black and pink are equally likely\n(C) black" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.1875, "loss": 4.5718889236450195 }, { "id": "identity_15377", "image": "ScienceQA/images/8268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which air temperature was measured within the outlined area shown?\nChoices:\n(A) -2°C\n(B) 5°C\n(C) 12°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.46875, "loss": 4.424508571624756 }, { "id": "identity_293178", "image": "A-OKVQA/images/377652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the cables used for?\nChoices:\n(A) rescuing him\n(B) lifting him\n(C) climbing\n(D) holding him" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.234375, "loss": 4.3950958251953125 }, { "id": "identity_78405", "image": "TQA/images/abc_question_images/parts_telescope_18152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'U' signify in the context of this telescope diagram?" }, { "from": "gpt", "value": "The answer is Mount/Stand" } ], "clip_score": 27.203125, "loss": 5.4409003257751465 }, { "id": "identity_25590", "image": "PMC-VQA/images/PMC5303254_Fig3_126212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the panel in the image? \nChoices:\n(A) Blue \n(B) Green \n(C) Yellow \n(D) Red " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.25, "loss": 4.045930862426758 }, { "id": "identity_295890", "image": "PMC-VQA/images/PMC7562835_ccr33014-fig-0001_274877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject matter depicted in this image?\nChoices:\n(A) gastrointestinal system\n(B) respiratory system\n(C) cardiovascular system\n(D) urinary system" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.765625, "loss": 4.932015419006348 }, { "id": "identity_51309", "image": "DocVQA/images/mpbg0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of contract does EMRO want a guaranteed price for?" }, { "from": "gpt", "value": "The answer is PL contract" } ], "clip_score": 27.59375, "loss": 4.1278815269470215 }, { "id": "identity_188380", "image": "VQA2.0/images/185666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the police motorcycles?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 30.21875, "loss": 4.885141372680664 }, { "id": "identity_26169", "image": "PMC-VQA/images/PMC9375826_FIG1_372143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the brain has ribboning of the gyri in the DWI image? \nChoices:\n(A) left side \n(B) right side \n(C) both sides \n(D) cannot determine " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.6875, "loss": 4.152378082275391 }, { "id": "identity_32107", "image": "A-OKVQA/images/106794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the carriage most likely looking for?\nChoices:\n(A) food\n(B) hay\n(C) predators\n(D) passengers" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.625, "loss": 4.578202247619629 }, { "id": "identity_288904", "image": "PMC-VQA/images/PMC6020456_fig2_333758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being displayed in the CT image on the right side of the mandible?\nChoices:\n(A) Periosteal reaction\n(B) Radiolucent lesion\n(C) Normal bone structure\n(D) Cavitation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.265625, "loss": 4.6043381690979 }, { "id": "identity_38411", "image": "TQA/images/question_images/solar_system_4574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the diagram, identify which is the largest planet.\nChoices:\n(A) Earth\n(B) Saturn\n(C) Jupiter\n(D) Neptune" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.171875, "loss": 4.016416549682617 }, { "id": "identity_3653", "image": "PMC-VQA/images/PMC4661982_Fig7_448463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to obtain the image? \nChoices:\n(A) TEM microscopy \n(B) Scanning electron microscopy \n(C) Confocal microscopy \n(D) Fluorescence microscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.4375, "loss": 4.218680381774902 }, { "id": "identity_269690", "image": "FigureQA/images/17407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.921875, "loss": 4.630121231079102 }, { "id": "identity_111443", "image": "Super-CLEVR/images/superCLEVR_new_000572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are depicted in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.375, "loss": 4.491026878356934 }, { "id": "identity_285741", "image": "A-OKVQA/images/267611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely action the dog will take with the frisbee shown in the image?\nChoices:\n(A) what frisbee\n(B) avoid it\n(C) catch it\n(D) nothing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.455776691436768 }, { "id": "identity_208100", "image": "A-OKVQA/images/26260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many raw meat pieces are there on the green plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.765625, "loss": 4.32177209854126 }, { "id": "identity_268987", "image": "A-OKVQA/images/115005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of bicycle is positioned behind the desk among the following options: hybrid, road, fixed gear, or mountain?\nChoices:\n(A) hybrid\n(B) road\n(C) fixed gear\n(D) mountain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.625, "loss": 4.410924434661865 }, { "id": "identity_14482", "image": "PMC-VQA/images/PMC4161720_Fig1_319755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of medical imaging was used to diagnose the condition? \nChoices:\n(A) X-Ray \n(B) MRI \n(C) PET-Scan \n(D) CT-Scan " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.171875, "loss": 4.689925193786621 }, { "id": "identity_196425", "image": "TextVQA/images/293f507bd888d00a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any decorative elements on the clock tower?" }, { "from": "gpt", "value": "The answer is Yes, gold accents and patterns" } ], "clip_score": 31.015625, "loss": 4.054244518280029 }, { "id": "identity_139060", "image": "DocVQA/images/rncj0037_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the table at the top of the image?" }, { "from": "gpt", "value": "The answer is RIP-6" } ], "clip_score": 22.96875, "loss": 4.688902854919434 }, { "id": "identity_319025", "image": "FigureQA/images/65013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.75, "loss": 4.225117206573486 }, { "id": "identity_209822", "image": "A-OKVQA/images/259567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.640625, "loss": 4.359379291534424 }, { "id": "identity_34845", "image": "PMC-VQA/images/PMC4156216_f4-ol-08-04-1713_318275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the imaging suggest had occurred in the same region? \nChoices:\n(A) Infection \n(B) Hemorrhage \n(C) Abscess \n(D) Tumor relapse " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.171875, "loss": 4.386744022369385 }, { "id": "identity_24249", "image": "PMC-VQA/images/PMC4495713_Fig4_404283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the section shows stronger staining in the first observation? \nChoices:\n(A) Notochordal sheath \n(B) Lumen \n(C) Outer layer \n(D) Connective tissue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.75, "loss": 4.16910457611084 }, { "id": "identity_138851", "image": "DVQA/images/bar_train_00065883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the least preference in the \"crime\" category?" }, { "from": "gpt", "value": "The answer is aspect" } ], "clip_score": 35.6875, "loss": 4.077637672424316 }, { "id": "identity_308432", "image": "PMC-VQA/images/PMC8380352_Fig5_38840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the light blue circles indicate in this image?\nChoices:\n(A) Blood cells\n(B) Iron-positive cells\n(C) Green-colored cells\n(D) Normal cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.585631370544434 }, { "id": "identity_301124", "image": "A-OKVQA/images/128110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen weather wise in this place?\nChoices:\n(A) rain\n(B) floods\n(C) hail\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.28125, "loss": 4.27555513381958 }, { "id": "identity_77325", "image": "TQA/images/question_images/human_system_respiratory_6199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ribs are shown on one side of the human body in this image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.359375, "loss": 4.209089756011963 }, { "id": "identity_209544", "image": "A-OKVQA/images/497610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the jacket of the person second from the left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.3125, "loss": 4.368427276611328 }, { "id": "identity_228251", "image": "VQA2.0/images/371791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the presence of barren trees and the activity in the image suggest the season, and what is the likely season depicted?" }, { "from": "gpt", "value": "The answer is early spring" } ], "clip_score": 16.546875, "loss": 5.201512813568115 }, { "id": "identity_320948", "image": "PMC-VQA/images/PMC7316744_Fig6_199222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the cyan areas in this represent? \nChoices:\n(A) the coral tissue autofluorescence \n(B) the chlorophyll autofluorescence \n(C) the non-fluorescent coral skeleton underneath the tissue \n(D) the excessive light conditions." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.326332092285156 }, { "id": "identity_332772", "image": "IconQA/images/90067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. What is the second image?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.46875, "loss": 4.538023948669434 }, { "id": "identity_8444", "image": "ScienceQA/images/10395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) New Haven\n(C) Baltimore\n(D) Providence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.84375, "loss": 4.178923606872559 }, { "id": "identity_320236", "image": "Super-CLEVR/images/superCLEVR_new_000832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the rubber thing behind the large blue bicycle?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 29.234375, "loss": 5.024932384490967 }, { "id": "identity_29203", "image": "VQA2.0/images/26812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in the blue bowl?" }, { "from": "gpt", "value": "The answer is packets" } ], "clip_score": 18.984375, "loss": 5.034989356994629 }, { "id": "identity_263096", "image": "TQA/images/abc_question_images/human_system_digestive_13702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter is used to represent the Spleen?\nChoices:\n(A) T\n(B) E\n(C) S\n(D) V" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.921875, "loss": 4.222866058349609 }, { "id": "identity_107924", "image": "Super-CLEVR/images/superCLEVR_new_000665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object in the image is the largest in size?" }, { "from": "gpt", "value": "The answer is The object with the brown bags" } ], "clip_score": 15.3359375, "loss": 4.218306064605713 }, { "id": "identity_153946", "image": "ScienceQA/images/3501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the figurine?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 29.109375, "loss": 4.307494163513184 }, { "id": "identity_27171", "image": "PMC-VQA/images/PMC5503588_F3_188338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of radiograph is shown in the image? \nChoices:\n(A) Lateral radiograph \n(B) Oblique radiograph \n(C) AP radiograph \n(D) PA radiograph " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.294846534729004 }, { "id": "identity_287002", "image": "PMC-VQA/images/PMC4651706_fig1_445703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the diagnostic test?\nChoices:\n(A) Displaced fracture at the distal tibia epiphysis \n(B) Non-displaced fracture at the mid tibia diaphysis \n(C) Focal posterior cortical thickening at the level of the fracture \n(D) Focal anterior cortical thickening at the level of the fracture." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.953125, "loss": 4.388271331787109 }, { "id": "identity_323199", "image": "A-OKVQA/images/140843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How was this object propelled forward?\nChoices:\n(A) blown on\n(B) kicked\n(C) dribbled\n(D) batted" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.34375, "loss": 4.244424819946289 }, { "id": "identity_125339", "image": "VQA-AS/images/7453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object on the ground to the right of the girls?" }, { "from": "gpt", "value": "The answer is A rock" } ], "clip_score": 23.328125, "loss": 4.5506978034973145 }, { "id": "identity_18473", "image": "A-OKVQA/images/7685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is floating above the rocks?\nChoices:\n(A) bird\n(B) duck\n(C) kite\n(D) newspaper" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.7421875, "loss": 4.414821624755859 }, { "id": "identity_277782", "image": "Geometry3K/images/10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the measure of angle 4?\nChoices:\n(A) 26\n(B) 39\n(C) 55\n(D) 62" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.28125, "loss": 4.300443172454834 }, { "id": "identity_307986", "image": "TQA/images/abc_question_images/earth_eclipses_11671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label for the earth in the diagram?\nChoices:\n(A) P\n(B) Y\n(C) S\n(D) U" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.9375, "loss": 4.196835517883301 }, { "id": "identity_253469", "image": "TextVQA/images/534f4ce2ac0805ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the company that made the jars and what additional information is provided on the labels?" }, { "from": "gpt", "value": "The answer is Diesel HPR, Propel, Neste Oil" } ], "clip_score": 28.671875, "loss": 4.559976577758789 }, { "id": "identity_110157", "image": "TextVQA/images/946e29d45efa4916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cover of the book?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.96875, "loss": 4.041903495788574 }, { "id": "identity_292103", "image": "FigureQA/images/14098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 4.606936931610107 }, { "id": "identity_28721", "image": "PMC-VQA/images/PMC5558648_fig2_203640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method was used to obtain the image? \nChoices:\n(A) Hematoxylin and eosin staining \n(B) Immunohistochemistry staining \n(C) Masson staining \n(D) Periodic acid-Schiff staining " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.3125, "loss": 4.308331489562988 }, { "id": "identity_46271", "image": "DocVQA/images/ylvw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the client mentioned in the report?" }, { "from": "gpt", "value": "The answer is Wyeth" } ], "clip_score": 22.109375, "loss": 4.250484943389893 }, { "id": "identity_262696", "image": "AI2D/images/966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the Singly-toothed leaf?\nChoices:\n(A) C\n(B) B\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.359375, "loss": 4.151169776916504 }, { "id": "identity_316679", "image": "PMC-VQA/images/PMC3488412_fig1_163711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these images depict?\nChoices:\n(A) a pattern of atrophy in a sagittal slice\n(B) a pattern of atrophy in a coronal slice\n(C) a pattern of atrophy in an axial slice\n(D) a pattern of atrophy in a coronal and axial slice combined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0625, "loss": 4.2413201332092285 }, { "id": "identity_292934", "image": "PMC-VQA/images/PMC6503986_DEV169342F3_476197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green color represent in the images provided?\nChoices:\n(A) Dextran \n(B) Cell nuclei \n(C) Cyst cell membranes \n(D) Mitochondria" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.90625, "loss": 4.666728973388672 }, { "id": "identity_321752", "image": "PMC-VQA/images/PMC3920997_got033-F2_266340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the endoscope in this image?\nChoices:\n(A) lower oesophagus, towards a lateral wall \n(B) upper oesophagus, towards a lateral wall \n(C) middle oesophagus, towards a wall \n(D) upper oesophagus, towards a lateral wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 4.182819843292236 }, { "id": "identity_212470", "image": "VQA2.0/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the canopy above the food stall?" }, { "from": "gpt", "value": "The answer is White with red text" } ], "clip_score": 19.59375, "loss": 4.672133445739746 }, { "id": "identity_68821", "image": "AI2D/images/3492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mitochondria are visible in the given plant cell diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 37.0, "loss": 4.5849289894104 }, { "id": "identity_194949", "image": "TextVQA/images/974315dec07b9f44.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main action captured in this image?" }, { "from": "gpt", "value": "The answer is A baseball pitch" } ], "clip_score": 26.34375, "loss": 4.677013397216797 }, { "id": "identity_188334", "image": "A-OKVQA/images/395216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many skateboarders are in a crouching position?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.875, "loss": 4.335227012634277 }, { "id": "identity_271298", "image": "ScienceQA/images/13618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Illinois?\nChoices:\n(A) Dallas\n(B) Chicago\n(C) Springfield\n(D) Bismarck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.546875, "loss": 4.251958847045898 }, { "id": "identity_34691", "image": "IconQA/images/44180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 26.28125, "loss": 5.158209800720215 }, { "id": "identity_143116", "image": "DocVQA/images/jzhd0227_77.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of the reviewed projects were approved and funded for an amount less than $100,000?" }, { "from": "gpt", "value": "The answer is Six projects" } ], "clip_score": 18.5625, "loss": 4.5185065269470215 }, { "id": "identity_312626", "image": "TQA/images/question_images/sun_layers_6307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost structure inside?\nChoices:\n(A) Convection zone\n(B) Radiative zone\n(C) Photosphere\n(D) Prominence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.40625, "loss": 4.425131797790527 }, { "id": "identity_206653", "image": "TextVQA/images/57205e3db8a9cb53.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bookmarks advertise a website URL?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.1484375, "loss": 4.49672269821167 }, { "id": "identity_26700", "image": "A-OKVQA/images/372554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green stuff here need?\nChoices:\n(A) water\n(B) salt\n(C) gas\n(D) electricity" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 3.935546875, "loss": 4.579405784606934 }, { "id": "identity_96274", "image": "CLEVR-Math/images/CLEVR_train_060178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more gray spheres or green spheres?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 28.984375, "loss": 4.041388511657715 }, { "id": "identity_170802", "image": "IconQA/images/97204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more diamonds in the top row or the bottom row?" }, { "from": "gpt", "value": "The answer is The same number" } ], "clip_score": 25.953125, "loss": 4.063349723815918 }, { "id": "identity_274855", "image": "A-OKVQA/images/469728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for securing the bike to the rail?\nChoices:\n(A) stay upright\n(B) easily spotted\n(C) prevent theft\n(D) to sell" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.640625, "loss": 4.3744282722473145 }, { "id": "identity_303212", "image": "PMC-VQA/images/PMC4391185_Fig3_376517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture these images?\nChoices:\n(A) X-ray \n(B) Magnetic Resonance Imaging (MRI) \n(C) Computed Tomography (CT) \n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.609375, "loss": 4.203488349914551 }, { "id": "identity_119447", "image": "A-OKVQA/images/264362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of cheese are visible on the plate?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.328125, "loss": 4.594819068908691 }, { "id": "identity_272505", "image": "AI2D/images/4436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the name of fin H?\n(A) Anal fin\n(B) Pectoral fin\n(C) Dorsal fin\n(D) Pelvic fin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.375, "loss": 4.832409381866455 }, { "id": "identity_215523", "image": "PMC-VQA/images/PMC6414139_acm212503-fig-0004_449322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to outline structures in panel (b)?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 16.578125, "loss": 4.363932132720947 }, { "id": "identity_311263", "image": "AI2D/images/3244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the Cytoplasm?\nChoices:\n(A) A\n(B) J\n(C) D\n(D) H" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.09375, "loss": 4.09060525894165 }, { "id": "identity_330663", "image": "DVQA/images/bar_train_00105570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second stack of bars from here?" }, { "from": "gpt", "value": "The answer is start" } ], "clip_score": 25.328125, "loss": 4.143326759338379 }, { "id": "identity_2378", "image": "TQA/images/question_images/radioactive_decay_8174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the upper left superscript beside the element?\nChoices:\n(A) It shows the number of neutrons.\n(B) It shows the number of charge.\n(C) It shows the number of electons.\n(D) It shows the number of protons." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.90625, "loss": 4.896727561950684 }, { "id": "identity_37758", "image": "PMC-VQA/images/PMC7118815_Fig3_144434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the physical examination finding observed in the study? \nChoices:\n(A) Swollen MTP joints \n(B) Reduced joint mobility \n(C) Joint pain \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.3125, "loss": 4.292112827301025 }, { "id": "identity_7879", "image": "TextVQA/images/5358fc8440bafe4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the letters on the side of the plane?" }, { "from": "gpt", "value": "The answer is aw" } ], "clip_score": 21.875, "loss": 4.917869567871094 }, { "id": "identity_287936", "image": "A-OKVQA/images/373153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category does the animal belong to? \nChoices:\n(A) domestic\n(B) aquatic\n(C) wild\n(D) stuffed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.75, "loss": 4.486581802368164 }, { "id": "identity_318600", "image": "PMC-VQA/images/PMC8672052_FIG5_139657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis based on these findings?\nChoices:\n(A) lymphadenitis\n(B) lymphoma\n(C) metastatic disease\n(D) tuberculosis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.578125, "loss": 4.389270782470703 }, { "id": "identity_297819", "image": "IconQA/images/66868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 4.307789325714111 }, { "id": "identity_306230", "image": "FigureQA/images/81979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve the largest for Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.484375, "loss": 4.349308013916016 }, { "id": "identity_130122", "image": "TextVQA/images/87d7fdf7b45162d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the featured speaker at the lecture?" }, { "from": "gpt", "value": "The answer is T. Leming" } ], "clip_score": 11.25, "loss": 4.376129627227783 }, { "id": "identity_297055", "image": "ScienceQA/images/13670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Helena\n(B) Raleigh\n(C) Honolulu\n(D) Trenton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.4375, "loss": 4.175826072692871 }, { "id": "identity_297775", "image": "PMC-VQA/images/PMC4139810_ijms-15-11730-f005_314458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the subject of the photograph: human eye, micro-tubes, chymotrypsin, or SEM machine?\nChoices:\n(A) human eye \n(B) micro-tubes \n(C) chymotrypsin \n(D) SEM machine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 4.236203670501709 }, { "id": "identity_274616", "image": "FigureQA/images/10333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.515625, "loss": 4.492938041687012 }, { "id": "identity_325996", "image": "A-OKVQA/images/149467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which team is the catcher associated with?\nChoices:\n(A) cubs\n(B) yankees\n(C) astros\n(D) mets" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.40625, "loss": 4.149243354797363 }, { "id": "identity_273066", "image": "FigureQA/images/60664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in shade, Firebrick or Medium Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.1875, "loss": 4.811550140380859 }, { "id": "identity_24005", "image": "PMC-VQA/images/PMC5029079_Fig1_74770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the thoracic window seen in the images? \nChoices:\n(A) Mediastinal Window \n(B) Pulmonary Window \n(C) Abdominothoracic Window \n(D) Cardiovascular Window " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.046624660491943 }, { "id": "identity_325526", "image": "PMC-VQA/images/PMC5725823_Fig1_251724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MR image is depicted in (a)?\nChoices:\n(A) T1-weighted image \n(B) T2-weighted image \n(C) DWI image \n(D) FLAIR image" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.328125, "loss": 4.234623908996582 }, { "id": "identity_301679", "image": "A-OKVQA/images/2823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle are these?\nChoices:\n(A) truck\n(B) motorcycle\n(C) helicopter\n(D) bicycle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.125, "loss": 4.208868980407715 }, { "id": "identity_257625", "image": "PMC-VQA/images/PMC4592919_fig9_429543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of a diffusion-weighted MRI image, and how do they differ from T1-weighted and T2-weighted MRI images?" }, { "from": "gpt", "value": "The answer is DWI images show restricted diffusion as bright areas" } ], "clip_score": 31.703125, "loss": 4.198601722717285 }, { "id": "identity_16217", "image": "FigureQA/images/85239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Green have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.234375, "loss": 4.140020847320557 }, { "id": "identity_156103", "image": "AI2D/images/929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organisms in this food chain are classified as possibly being prey for the Sharp-shinned Hawk?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.546875, "loss": 4.447928428649902 }, { "id": "identity_240402", "image": "TabMWP/images/34990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the slope of the function represented by the table and verify if the function is linear by checking if the slope is constant between all points" }, { "from": "gpt", "value": "The answer is linear" } ], "clip_score": 25.140625, "loss": 4.894036769866943 }, { "id": "identity_264264", "image": "A-OKVQA/images/147228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen on the railing?\nChoices:\n(A) skateboarder\n(B) package\n(C) birds\n(D) mannequin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.46875, "loss": 4.23622989654541 }, { "id": "identity_182445", "image": "A-OKVQA/images/9708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living beings are in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.015625, "loss": 4.32727575302124 }, { "id": "identity_271372", "image": "FigureQA/images/75193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5, "loss": 4.109316349029541 }, { "id": "identity_38217", "image": "A-OKVQA/images/90393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are shown in the foreground?\nChoices:\n(A) evergreen\n(B) conifers\n(C) christmas\n(D) deciduous" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.35546875, "loss": 4.159767150878906 }, { "id": "identity_307888", "image": "FigureQA/images/45931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8125, "loss": 4.1029863357543945 }, { "id": "identity_4991", "image": "ScienceQA/images/15731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Georgia\n(B) South Carolina\n(C) North Carolina\n(D) Florida" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.875, "loss": 4.298807144165039 }, { "id": "identity_283478", "image": "A-OKVQA/images/344292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of company is most likely to utilize this location out of the following options: brewing, dairy, catering, or manufacturing?\nChoices:\n(A) brewing\n(B) dairy\n(C) catering\n(D) manufacturing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.765625, "loss": 4.47222900390625 }, { "id": "identity_301158", "image": "AI2D/images/1079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located in the center of this structure?\nChoices:\n(A) nucleolus\n(B) ribosome\n(C) none of the above\n(D) lysosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.015625, "loss": 4.411111354827881 }, { "id": "identity_2213", "image": "ScienceQA/images/10763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this window made of?\nChoices:\n(A) glass\n(B) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 4.391323089599609 }, { "id": "identity_283832", "image": "PMC-VQA/images/PMC5679770_FIG3_237908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is shifted to the right in the given options? \n(A) External capsule \n(B) Internal capsule \n(C) Corticospinal tract \n(D) Cingulate gyrus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.46875, "loss": 4.573663711547852 }, { "id": "identity_309869", "image": "TQA/images/question_images/human_system_eye_6133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the retina?\nChoices:\n(A) This is where the image of the object normally forms\n(B) It carries the nerve impulses to the brain\n(C) focuses the light on the back of the eye\n(D) protective covering on the outside of the eye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.03125, "loss": 4.2340407371521 }, { "id": "identity_322872", "image": "PMC-VQA/images/PMC2248189_F3_17874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red color indicate in this image?\nChoices:\n(A) EGFP fluorescence \n(B) Organelle marker \n(C) Secondary antibody \n(D) Background noise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 5.341193675994873 }, { "id": "identity_110813", "image": "TextVQA/images/b19f3edfebac5793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is shown in the image?" }, { "from": "gpt", "value": "The answer is Television" } ], "clip_score": 23.390625, "loss": 4.072370529174805 }, { "id": "identity_310609", "image": "A-OKVQA/images/327514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing here?\nChoices:\n(A) selling\n(B) baking\n(C) multitasking\n(D) drying" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.71875, "loss": 4.6119303703308105 }, { "id": "identity_189795", "image": "A-OKVQA/images/92136.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky clear or cloudy?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 13.90625, "loss": 4.219422340393066 }, { "id": "identity_307727", "image": "ScienceQA/images/8237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony depicted?\nChoices:\n(A) Pennsylvania\n(B) Massachusetts\n(C) Rhode Island\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.484375, "loss": 4.773238182067871 }, { "id": "identity_20963", "image": "ScienceQA/images/13800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Biloxi\n(B) Chicago\n(C) Tallahassee\n(D) Orlando" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.265625, "loss": 4.197652339935303 }, { "id": "identity_131502", "image": "TextVQA/images/bbc3c318765994c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What body of water is depicted in the letter \"P\"?" }, { "from": "gpt", "value": "The answer is A river" } ], "clip_score": 11.8671875, "loss": 4.005167484283447 }, { "id": "identity_291492", "image": "A-OKVQA/images/206975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation is depicted in the image above?\nChoices:\n(A) road\n(B) water\n(C) air\n(D) railway" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.046875, "loss": 4.22190523147583 }, { "id": "identity_322548", "image": "VizWiz/images/VizWiz_train_00011709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a desk in an office?" }, { "from": "gpt", "value": "The answer is desk" } ], "clip_score": 32.5, "loss": 4.331843376159668 }, { "id": "identity_288254", "image": "PMC-VQA/images/PMC3227162_F1_117676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the thrombus located?\nChoices:\n(A) Right atrium\n(B) Right ventricle\n(C) Left atrium\n(D) Left ventricle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.953125, "loss": 4.37910270690918 }, { "id": "identity_18910", "image": "PMC-VQA/images/PMC8127650_f1_443625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used in the caption? \nChoices:\n(A) Positron Emission Tomography (PET) \n(B) Computed Tomography (CT) \n(C) Magnetic Resonance Imaging (MRI) \n(D) X-ray imaging " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.53125, "loss": 4.393562316894531 }, { "id": "identity_9263", "image": "FigureQA/images/23671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.265625, "loss": 4.1622467041015625 }, { "id": "identity_323257", "image": "TextVQA/images/005a2a34b7a0e507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is on this person's shirt?" }, { "from": "gpt", "value": "The answer is turbo" } ], "clip_score": 17.015625, "loss": 4.21521520614624 }, { "id": "identity_71439", "image": "AI2D/images/808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role do grasshoppers play in this food chain?" }, { "from": "gpt", "value": "The answer is primary consumer" } ], "clip_score": 29.953125, "loss": 4.464590549468994 }, { "id": "identity_198905", "image": "PMC-VQA/images/PMC5019903_fig6_72310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phalanges are visible in the X-ray image of the hand?" }, { "from": "gpt", "value": "The answer is Fourteen" } ], "clip_score": 33.84375, "loss": 4.689783573150635 }, { "id": "identity_26082", "image": "Super-CLEVR/images/superCLEVR_new_001320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the motorbike that is in front of the large gray metallic object that is to the left of the metal fighter made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 26.984375, "loss": 4.556046962738037 }, { "id": "identity_184748", "image": "Super-CLEVR/images/superCLEVR_new_001556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many propellers does the purple airplane have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.109375, "loss": 4.3252272605896 }, { "id": "identity_325590", "image": "A-OKVQA/images/216726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is most likely to be found in this place?\nChoices:\n(A) king cake\n(B) salmon\n(C) horse\n(D) tires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5390625, "loss": 4.327284336090088 }, { "id": "identity_105272", "image": "A-OKVQA/images/234929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the plane's body?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.640625, "loss": 4.1855926513671875 }, { "id": "identity_216158", "image": "TextVQA/images/fe6cbda78fb74f87.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the label of the bottle on the top shelf, second from the right?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 26.578125, "loss": 4.039771556854248 }, { "id": "identity_77469", "image": "AI2D/images/1210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does the insect in the image have?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.578125, "loss": 4.167970180511475 }, { "id": "identity_313754", "image": "VQA-AS/images/6329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this open?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.40625, "loss": 4.102529048919678 }, { "id": "identity_269375", "image": "A-OKVQA/images/334868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What cargo does the truck equipped with a box-topped scissor lift transport?\nChoices:\n(A) pets\n(B) live bait\n(C) food\n(D) captains" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.890625, "loss": 4.180074691772461 }, { "id": "identity_198203", "image": "TextVQA/images/cd0ac1e8d03df907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many drawers are visible in the kitchen display?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.15625, "loss": 4.454830169677734 }, { "id": "identity_313337", "image": "ScienceQA/images/329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) sticky\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.765625, "loss": 4.830741882324219 }, { "id": "identity_229477", "image": "VQA2.0/images/265080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the image are wearing a white jersey, and what are their roles?" }, { "from": "gpt", "value": "The answer is One person, the batter" } ], "clip_score": 24.34375, "loss": 4.050322532653809 }, { "id": "identity_38050", "image": "A-OKVQA/images/117838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these people waiting to do?\nChoices:\n(A) board bus\n(B) see show\n(C) shop sale\n(D) get treatment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.84375, "loss": 4.086197853088379 }, { "id": "identity_288496", "image": "PMC-VQA/images/PMC9224862_jcm-11-03399-f001_321101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique utilized in this image?\nChoices:\n(A) MRI\n(B) Ultrasound\n(C) CT scan\n(D) X-ray." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.453125, "loss": 5.012793064117432 }, { "id": "identity_296957", "image": "PMC-VQA/images/PMC3037324_F1_86764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does the surrounding area of the tumor appear on MRI imaging?\nChoices:\n(A) Dark signal \n(B) Bright signal \n(C) Low signal \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.40625, "loss": 4.533193111419678 }, { "id": "identity_275899", "image": "Super-CLEVR/images/superCLEVR_new_001188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the big matte car in front of the large brown articulated bus the same as the matte object behind the rubber school bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.609375, "loss": 5.020964622497559 }, { "id": "identity_334453", "image": "ScienceQA/images/1058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Cuba\n(B) Jamaica\n(C) Haiti\n(D) Dominica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 4.158640384674072 }, { "id": "identity_264087", "image": "PMC-VQA/images/PMC5991596_brb31010-fig-0003_325532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the lateral geniculate body depicted in the image? Is it:\n(A) blue\n(B) pink\n(C) green\n(D) red" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.375, "loss": 4.770933151245117 }, { "id": "identity_210220", "image": "VQA2.0/images/357641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many towel bars are visible in the bathroom?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.6875, "loss": 4.549899101257324 }, { "id": "identity_265129", "image": "VizWiz/images/VizWiz_train_00011564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Good morning. Could you please confirm if this necklace is made of gold or silver? Thank you for your input." }, { "from": "gpt", "value": "The answer is silver white gold" } ], "clip_score": 28.984375, "loss": 4.662417411804199 }, { "id": "identity_215616", "image": "VQA2.0/images/487228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image excluding the blurred face?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.4375, "loss": 4.555308818817139 }, { "id": "identity_296458", "image": "PMC-VQA/images/PMC3744490_pone-0072265-g004_224985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the yellow color in the coded images? Is it the charcoal volume with the largest volume, smallest volume, average volume, or unknown volume?\nChoices:\n(A) Charcoal volume with the largest volume \n(B) Charcoal volume with the smallest volume \n(C) Charcoal volume with an average volume \n(D) Charcoal volume with unknown volume." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.40625, "loss": 4.087571144104004 }, { "id": "identity_287154", "image": "DocVQA/images/qxbx0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the presentation by John S. Angelos?" }, { "from": "gpt", "value": "The answer is the great western continuous saccharate process" } ], "clip_score": 22.046875, "loss": 4.703248023986816 }, { "id": "identity_265157", "image": "AI2D/images/2155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the typical duration for a larva to hatch from an egg?\nChoices:\n(A) 3 to 14 days\n(B) 2.25 to 23 days\n(C) 8 to 30 hours\n(D) 3 to 10 days" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.671875, "loss": 4.324972152709961 }, { "id": "identity_299645", "image": "ScienceQA/images/5326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does a book belong to: solid, liquid, or gas?\nChoices:\n(A) solid\n(B) liquid\n(C) gas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.201815128326416 }, { "id": "identity_192976", "image": "PMC-VQA/images/PMC3320613_pgen-1002631-g004_132962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct green colored spots can be counted in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 17.75, "loss": 4.699260234832764 }, { "id": "identity_280781", "image": "AI2D/images/3340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the outermost part of a cell: vacuole, nucleus, cilia, or plasma?\nChoices:\n(A) vacuole\n(B) nucleus\n(C) cilia\n(D) plasma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.6875, "loss": 4.182306289672852 }, { "id": "identity_37298", "image": "A-OKVQA/images/490739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The man on the left is probably a member of what type of group?\nChoices:\n(A) politician\n(B) teacher\n(C) farmer\n(D) clergy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.140625, "loss": 4.2735161781311035 }, { "id": "identity_23656", "image": "PMC-VQA/images/PMC4140120_fig1_314517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tissue type shown in the image? \nChoices:\n(A) Kidney tissue \n(B) Brain tissue \n(C) Lung tissue \n(D) Heart tissue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.8125, "loss": 4.508609771728516 }, { "id": "identity_6827", "image": "ScienceQA/images/4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) breakable\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.015625, "loss": 4.162696361541748 }, { "id": "identity_337885", "image": "A-OKVQA/images/244171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these individuals looking at?\nChoices:\n(A) festival\n(B) fireworks\n(C) firestorm\n(D) bonfire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.484375, "loss": 4.570377349853516 }, { "id": "identity_320740", "image": "PMC-VQA/images/PMC2737537_F2_44773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the area with reduced blood flow located?\nChoices:\n(A) Inferior wall of a ventricle\n(B) Anterior wall of a ventricle\n(C) Lateral wall of a ventricle\n(D) Septal wall of a ventricle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.578125, "loss": 4.738770961761475 }, { "id": "identity_20181", "image": "VQA2.0/images/514457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bear wearing on its face?" }, { "from": "gpt", "value": "The answer is furr" } ], "clip_score": 14.859375, "loss": 5.428647994995117 }, { "id": "identity_309139", "image": "FigureQA/images/59489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.859375, "loss": 4.132959365844727 }, { "id": "identity_23479", "image": "PMC-VQA/images/PMC6265358_Fig1_403332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What component of the PET/CT image is being zoomed in?\nChoices:\n(A) CT component \n(B) PET component \n(C) Fusion component \n(D) Segmentation component " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.220982551574707 }, { "id": "identity_337785", "image": "PMC-VQA/images/PMC5816948_F2_277379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What skin type is shown in images D-F?\nChoices:\n(A) LL skin\n(B) HSk skin\n(C) Both\n(D) Neither" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.953125, "loss": 4.133461952209473 }, { "id": "identity_261522", "image": "FigureQA/images/73942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the smoothest option available?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 4.174081802368164 }, { "id": "identity_23210", "image": "PMC-VQA/images/PMC6523364_genes-10-00292-f001_481821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrow in the image? \nChoices:\n(A) Spiracle \n(B) Photophore \n(C) Pseudoculus \n(D) Head " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 5.067808628082275 }, { "id": "identity_28550", "image": "A-OKVQA/images/42805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who use the big umbrellas?\nChoices:\n(A) hawkers\n(B) shoppers\n(C) residents\n(D) shop owners" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0625, "loss": 4.446521759033203 }, { "id": "identity_5900", "image": "DVQA/images/bar_train_00198239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth bar from the left?" }, { "from": "gpt", "value": "The answer is tail" } ], "clip_score": 18.46875, "loss": 4.319721698760986 }, { "id": "identity_298961", "image": "FigureQA/images/33886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sienna not the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.21875, "loss": 4.000610828399658 }, { "id": "identity_200315", "image": "A-OKVQA/images/517410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen on the sailboat?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.09375, "loss": 4.804198265075684 }, { "id": "identity_201467", "image": "A-OKVQA/images/104801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many framed pictures are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 19.0, "loss": 4.768945217132568 }, { "id": "identity_21409", "image": "PMC-VQA/images/PMC6770061_ijms-20-04401-f001_43929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is indicated by the asterisks in the image?\nChoices:\n(A) IMCD \n(B) Thick limbs \n(C) Thin limbs \n(D) Expression of claudin-19 TJ in IMCD" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.203125, "loss": 4.818386077880859 }, { "id": "identity_2099", "image": "Super-CLEVR/images/superCLEVR_new_001280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any cyan metallic mountain bikes that have the same size as the red shiny motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.046875, "loss": 5.300597190856934 }, { "id": "identity_336888", "image": "PMC-VQA/images/PMC5872966_materials-11-00387-f001_294549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the inset of the image?\nChoices:\n(A) The top view of thin BCN sheets\n(B) The structure of BCN sheets at the atomic level\n(C) The crystallographic orientation of thin BCN sheets\n(D) The size distribution of thin BCN sheets" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.96875, "loss": 4.280089378356934 }, { "id": "identity_20580", "image": "AI2D/images/3434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the section at the back called?\nChoices:\n(A) head\n(B) thorax\n(C) abdomen\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.21875, "loss": 4.1809306144714355 }, { "id": "identity_24690", "image": "A-OKVQA/images/4535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of event is being attended by the costumed riders?\nChoices:\n(A) steeple chase\n(B) parade\n(C) renaissance faire\n(D) sideshow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.203125, "loss": 4.203921794891357 }, { "id": "identity_290039", "image": "PMC-VQA/images/PMC5649724_F4_229370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the MRI, where is the capsular retraction located?\nChoices:\n(A) Right lobe of the heart\n(B) Left lobe of the liver\n(C) Right lobe of the liver\n(D) Left lobe of the kidney" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.15625, "loss": 4.040946006774902 }, { "id": "identity_191422", "image": "TextVQA/images/0f790eb3dbd26485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object depicted at the bottom of the coin?" }, { "from": "gpt", "value": "The answer is Mexican volcanoes" } ], "clip_score": 26.9375, "loss": 4.378914833068848 }, { "id": "identity_211877", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the park mentioned on the street sign?" }, { "from": "gpt", "value": "The answer is Central Park" } ], "clip_score": 25.546875, "loss": 4.278860092163086 }, { "id": "identity_21088", "image": "TextVQA/images/36d8f0024be1a1a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the top left label say?" }, { "from": "gpt", "value": "The answer is adult education" } ], "clip_score": 20.9375, "loss": 4.3154497146606445 }, { "id": "identity_338484", "image": "PMC-VQA/images/PMC5602807_Fig1_216782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the liver is affected by the tumor?\nChoices:\n(A) right part\n(B) left part\n(C) both parts\n(D) unknown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.329449653625488 }, { "id": "identity_189318", "image": "A-OKVQA/images/243820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups can be seen on the counter?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.640625, "loss": 5.461216926574707 }, { "id": "identity_283052", "image": "A-OKVQA/images/536806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following sentient beings have the ability to swim? \n(A) bugs\n(B) humans\n(C) goat\n(D) birds" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.34375, "loss": 4.0645952224731445 }, { "id": "identity_188971", "image": "PMC-VQA/images/PMC8064425_FIG3_423587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.625, "loss": 4.140408992767334 }, { "id": "identity_262480", "image": "A-OKVQA/images/121143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located in close proximity to the tower?\nChoices:\n(A) car\n(B) airplane\n(C) princess\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.171875, "loss": 4.370522499084473 }, { "id": "identity_283281", "image": "A-OKVQA/images/181084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the police surveillance among the following options: accident, balloon sale, riot, or parade?\nChoices:\n(A) accident\n(B) balloon sale\n(C) riot\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.671875, "loss": 4.4158735275268555 }, { "id": "identity_291018", "image": "PMC-VQA/images/PMC8414583_F2_50810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where can the granule be observed in the previous time frame? \nOptions:\n(A) Top left\n(B) Top right\n(C) Bottom left\n(D) Bottom right" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8203125, "loss": 4.381875514984131 }, { "id": "identity_205761", "image": "VQA2.0/images/205047.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lamps are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.765625, "loss": 4.335053443908691 }, { "id": "identity_276248", "image": "ScienceQA/images/5412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Cuba\n(B) Saint Vincent and the Grenadines\n(C) Grenada\n(D) Trinidad and Tobago" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.265625, "loss": 4.480916500091553 }, { "id": "identity_21646", "image": "TextVQA/images/974315dec07b9f44.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letters are visible on the wall of the dugout?" }, { "from": "gpt", "value": "The answer is ch" } ], "clip_score": 18.71875, "loss": 4.838171005249023 }, { "id": "identity_130836", "image": "A-OKVQA/images/181453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels of the bicycle are visible?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.125, "loss": 4.427331447601318 }, { "id": "identity_273496", "image": "A-OKVQA/images/100746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purposes are these types of planes typically utilized?\nChoices:\n(A) crop dusting\n(B) military\n(C) tourism\n(D) commercial travel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.484375, "loss": 4.101994514465332 }, { "id": "identity_281927", "image": "ScienceQA/images/15367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Dover\n(B) Honolulu\n(C) Hilo\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0625, "loss": 4.348955154418945 }, { "id": "identity_204463", "image": "VQA2.0/images/347505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the container with a lid, placed on the floor to the left of the fridge?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.703125, "loss": 4.42954158782959 }, { "id": "identity_2277", "image": "TextVQA/images/1079d274d2730d15.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what word is written in yellow font on the blue sign?" }, { "from": "gpt", "value": "The answer is nova" } ], "clip_score": 14.8203125, "loss": 4.896676063537598 }, { "id": "identity_315326", "image": "FigureQA/images/97400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.765625, "loss": 4.1655378341674805 }, { "id": "identity_267628", "image": "A-OKVQA/images/56109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which subway system does this bus transport passengers to a stop?\nChoices:\n(A) Montreal Metro\n(B) Paris Metro\n(C) Berlin U-Bahn\n(D) London Underground" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.78125, "loss": 4.088884353637695 }, { "id": "identity_222833", "image": "A-OKVQA/images/387704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific message is displayed on the signs attached to the animal?" }, { "from": "gpt", "value": "The answer is Show Us Your Earth" } ], "clip_score": 18.015625, "loss": 4.538578033447266 }, { "id": "identity_30271", "image": "PMC-VQA/images/PMC4155767_F1_318086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to reveal the cystic mass at the anterior cervix? \nChoices:\n(A) CT scan \n(B) MRI scan \n(C) X-ray \n(D) Ultrasonography. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0, "loss": 4.255239009857178 }, { "id": "identity_188857", "image": "TextVQA/images/22db0f943c236f8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the helmets?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 12.3125, "loss": 4.078604698181152 }, { "id": "identity_108430", "image": "VQA-AS/images/12795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.140625, "loss": 4.444314479827881 }, { "id": "identity_326407", "image": "ScienceQA/images/948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Jefferson City\n(B) Indianapolis\n(C) Bismarck\n(D) Springfield" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.796875, "loss": 4.235227108001709 }, { "id": "identity_336312", "image": "A-OKVQA/images/252952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the individuals seated with the small child?\nChoices:\n(A) restaurant\n(B) museum\n(C) playground\n(D) square" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.484375, "loss": 4.143516540527344 }, { "id": "identity_323339", "image": "TextVQA/images/03b10f45eb118bd5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of camera is shown here?" }, { "from": "gpt", "value": "The answer is canon" } ], "clip_score": 19.5625, "loss": 4.207533836364746 }, { "id": "identity_305927", "image": "AI2D/images/2730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage follows waning gibbous?\nChoices:\n(A) waxing gibbous\n(B) new moon\n(C) last quarter\n(D) first quarter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.24429178237915 }, { "id": "identity_1792", "image": "PMC-VQA/images/PMC4802877_Fig3_13718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the radiograph in a? \nChoices:\n(A) Postoperative follow-up \n(B) Diagnosis of hip osteoarthritis \n(C) Lung examination \n(D) Dental check-up " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.015625, "loss": 5.291287899017334 }, { "id": "identity_283625", "image": "PMC-VQA/images/PMC8019922_f13_409503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the perforating lenticulostriate arteries located in the image provided?\nChoices:\n(A) Left insula \n(B) Intraventricular space \n(C) Right insula \n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.390625, "loss": 4.383934497833252 }, { "id": "identity_107851", "image": "VQA2.0/images/172543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing shorts?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.734375, "loss": 4.816287517547607 }, { "id": "identity_333046", "image": "FigureQA/images/38243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.5859375, "loss": 4.084574222564697 }, { "id": "identity_131973", "image": "VQA2.0/images/204420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle in the background?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.140625, "loss": 4.1657891273498535 }, { "id": "identity_114443", "image": "VQA2.0/images/325532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen on the beach?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.5625, "loss": 4.168030738830566 }, { "id": "identity_18950", "image": "FigureQA/images/36723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.90625, "loss": 4.104660987854004 }, { "id": "identity_134399", "image": "DVQA/images/bar_train_00038485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"movie\"?" }, { "from": "gpt", "value": "The answer is while" } ], "clip_score": 23.375, "loss": 4.2517805099487305 }, { "id": "identity_34396", "image": "PMC-VQA/images/PMC6824487_f1_59614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the source of the tomographic slices?\nChoices:\n(A) CT scan \n(B) MRI \n(C) Ultrasonography \n(D) X-ray " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.65169095993042 }, { "id": "identity_272223", "image": "FigureQA/images/38538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.700438499450684 }, { "id": "identity_125719", "image": "VQA2.0/images/351734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of fruit can be seen on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.1640625, "loss": 4.593066215515137 }, { "id": "identity_29945", "image": "A-OKVQA/images/573882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the article of clothing at the base of the console used for?\nChoices:\n(A) bulletproofing\n(B) holding weights\n(C) protect eyes\n(D) walking" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.4453125, "loss": 4.011072635650635 }, { "id": "identity_273386", "image": "VQA2.0/images/176265.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the flower petals on the table?" }, { "from": "gpt", "value": "The answer is no flowers" } ], "clip_score": 14.1484375, "loss": 4.027831554412842 }, { "id": "identity_302933", "image": "PMC-VQA/images/PMC3765512_F2_230058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the notable finding in this sagittal T1-weighted subtracted dynamic image of a breast?\nChoices:\n(A) irregular mass \n(B) homogeneous mass \n(C) ascites \n(D) irregular calcifications" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.875, "loss": 4.388396263122559 }, { "id": "identity_322179", "image": "A-OKVQA/images/128829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do you put into this item?\nChoices:\n(A) grains\n(B) vegetables\n(C) meat\n(D) money" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.70703125, "loss": 4.017449378967285 }, { "id": "identity_20596", "image": "A-OKVQA/images/124210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the person who left this gear do with it?\nChoices:\n(A) catch butterflies\n(B) go fishing\n(C) have picnic\n(D) water ski" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9296875, "loss": 4.383286476135254 }, { "id": "identity_188300", "image": "Super-CLEVR/images/superCLEVR_new_000074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.640625, "loss": 4.7711896896362305 }, { "id": "identity_301243", "image": "DVQA/images/bar_train_00177063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the steelblue color?" }, { "from": "gpt", "value": "The answer is code" } ], "clip_score": 22.984375, "loss": 5.1427001953125 }, { "id": "identity_118214", "image": "VQA2.0/images/150196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen on the left side of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.59375, "loss": 4.4939069747924805 }, { "id": "identity_218312", "image": "VizWiz/images/VizWiz_train_00011243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is visible on the surface beneath the document?" }, { "from": "gpt", "value": "The answer is Green with brown ellipses" } ], "clip_score": 31.140625, "loss": 4.072691440582275 }, { "id": "identity_288809", "image": "PMC-VQA/images/PMC9356014_Fig1_365449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the third row of the diagram?\nChoices:\n(A) Anterior vascular leakage grading, \n(B) Posterior vascular leakage grading, \n(C) Macular hyperfluorescence grading, \n(D) Leakage domain grading." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.6875, "loss": 4.376317024230957 }, { "id": "identity_324008", "image": "A-OKVQA/images/248767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is depicted in the image?\nChoices:\n(A) shopping\n(B) residential\n(C) business\n(D) tourist" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.1484375, "loss": 4.568543434143066 }, { "id": "identity_316651", "image": "TQA/images/abc_question_images/photosynthesis_11264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown for the label F?\nChoices:\n(A) CARBON DIOXIDE\n(B) GLUCOSE\n(C) OXYGEN\n(D) WATER" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.65625, "loss": 4.123621940612793 }, { "id": "identity_277278", "image": "Super-CLEVR/images/superCLEVR_new_001107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the number of small double busses to the right of the big red shiny thing equal to the number of small brown SUVs to the left of the tiny cyan fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.265625, "loss": 5.042799949645996 }, { "id": "identity_279471", "image": "PMC-VQA/images/PMC7366709_Fig3_215539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the white arrow in the ultra-wide field fluorescein angiography?\nChoices:\n(A) Peripheral non-perfusion area \n(B) Pre-retinal neovascular lesions \n(C) Hemorrhages \n(D) Macular degeneration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.4375, "loss": 4.291141510009766 }, { "id": "identity_331530", "image": "PMC-VQA/images/PMC5579201_Fig6_210197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the confocal images?\nChoices:\n(A) Neurons and astrocytes \n(B) DA-L-IR cell bodies and projection patterns \n(C) Axons and dendrites \n(D) Blood vessels and capillaries" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.75, "loss": 4.155304908752441 }, { "id": "identity_3717", "image": "AI2D/images/4823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents Glandosum?\nChoices:\n(A) G\n(B) D\n(C) E\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.78125, "loss": 4.362295627593994 }, { "id": "identity_323054", "image": "A-OKVQA/images/106635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a bright circle in the image?\nChoices:\n(A) sun light\n(B) edited in\n(C) bright lamp\n(D) laser light" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.41796875, "loss": 4.135558128356934 }, { "id": "identity_29259", "image": "PMC-VQA/images/PMC5387068_F3_151495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict? \nChoices:\n(A) A slice of a brain with metastasis \n(B) An enhanced green fluorescent protein (eGFP) labeled lesion \n(C) A slice of a brain after treatment with bevacizumab and L1-10AIB \n(D) A slice of a normal brain. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.609375, "loss": 4.036870956420898 }, { "id": "identity_54741", "image": "DVQA/images/bar_train_00000795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least number of people preferring it?" }, { "from": "gpt", "value": "The answer is growth" } ], "clip_score": 30.609375, "loss": 4.039067268371582 }, { "id": "identity_60982", "image": "DocVQA/images/rylg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Helen Duncan" } ], "clip_score": 21.265625, "loss": 4.991008758544922 }, { "id": "identity_207152", "image": "A-OKVQA/images/171785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.1640625, "loss": 4.435158729553223 }, { "id": "identity_20252", "image": "DocVQA/images/jzbn0226_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what should be able to be modified-expanded or contracted ?" }, { "from": "gpt", "value": "The answer is low priority designations" } ], "clip_score": 20.84375, "loss": 4.6729230880737305 }, { "id": "identity_193519", "image": "A-OKVQA/images/285131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the theater shown in the image?" }, { "from": "gpt", "value": "The answer is Jones Theatre" } ], "clip_score": 16.03125, "loss": 4.514125347137451 }, { "id": "identity_4511", "image": "AI2D/images/3076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by A?\nChoices:\n(A) vacuoles\n(B) ribosomes\n(C) chromatin\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.703125, "loss": 4.370913505554199 }, { "id": "identity_337839", "image": "PMC-VQA/images/PMC8880178_tomography-08-00042-f004_208275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiological finding observed on these scans?\nChoices:\n(A) Calcification \n(B) Effusion \n(C) Ground-glass opacities \n(D) Air trapping" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 4.782690048217773 }, { "id": "identity_275327", "image": "A-OKVQA/images/524255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What forms of entertainment do these individuals have available for their amusement?\nChoices:\n(A) opera\n(B) tv\n(C) horseback tricks\n(D) singing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.609375, "loss": 4.257429122924805 }, { "id": "identity_302702", "image": "FigureQA/images/77370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato less than this color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.1875, "loss": 4.356080055236816 }, { "id": "identity_277471", "image": "FigureQA/images/49175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Gold or Royal Blue? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.34375, "loss": 4.654207706451416 }, { "id": "identity_213195", "image": "A-OKVQA/images/393464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children are barefoot in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 25.59375, "loss": 4.3750224113464355 }, { "id": "identity_270384", "image": "A-OKVQA/images/260382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation is depicted in the image? \nOptions:\n(A) car\n(B) plane\n(C) hot air balloon\n(D) bike" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.02734375, "loss": 4.6352667808532715 }, { "id": "identity_270882", "image": "IconQA/images/5898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squares an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.109375, "loss": 4.273651123046875 }, { "id": "identity_317119", "image": "ScienceQA/images/16367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Hartford\n(B) Trenton\n(C) Newark\n(D) Jersey City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.703125, "loss": 4.005433082580566 }, { "id": "identity_108238", "image": "VQA2.0/images/195368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible light sources are there in the bathroom?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.71875, "loss": 4.622069358825684 }, { "id": "identity_212209", "image": "A-OKVQA/images/178268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible stirrups are attached to the saddle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.578125, "loss": 4.437790870666504 }, { "id": "identity_19437", "image": "PMC-VQA/images/PMC7301249_fig2_194538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image above? \nChoices:\n(A) Results of a PET scan \n(B) Results of a CT scan \n(C) Results of an MRI diffusion tensor tractography \n(D) Results of an MRI without diffusion tensor tractography. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.515625, "loss": 4.244974613189697 }, { "id": "identity_16053", "image": "PMC-VQA/images/PMC3141654_F11_103036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white dotted line show? \nChoices:\n(A) The progress of fusion \n(B) The progress of inversion \n(C) The progress of lateral subdivision \n(D) The progress of developing brain " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.953125, "loss": 4.915121078491211 }, { "id": "identity_316484", "image": "ChartQA/images/1030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest segment in the chart?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 16.78125, "loss": 4.4373860359191895 }, { "id": "identity_267330", "image": "FigureQA/images/91759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.390625, "loss": 4.7815141677856445 }, { "id": "identity_1299", "image": "AI2D/images/72.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The Carbon Cycle is being represented in the diagram, What letter is representing Coal and Oil?\nChoices:\n(A) B\n(B) C\n(C) D\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.0625, "loss": 4.1206746101379395 }, { "id": "identity_304326", "image": "FigureQA/images/35417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1015625, "loss": 4.580273628234863 }, { "id": "identity_302796", "image": "DVQA/images/bar_train_00112250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of bars are present?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 18.390625, "loss": 4.185285568237305 }, { "id": "identity_222557", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the church?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 20.75, "loss": 4.226823806762695 }, { "id": "identity_6445", "image": "Super-CLEVR/images/superCLEVR_new_001715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small yellow shiny thing have the same shape as the yellow rubber thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.5390625, "loss": 5.179541110992432 }, { "id": "identity_219503", "image": "A-OKVQA/images/60316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 31.734375, "loss": 4.471829414367676 }, { "id": "identity_267562", "image": "PMC-VQA/images/PMC5719812_Fig3_249998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique is utilized to achieve the staining pattern among the following options: \n(A) Confocal microscopy \n(B) Electron microscopy \n(C) Fluorescent microscopy \n(D) Immunofluorescence microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 4.331051826477051 }, { "id": "identity_264892", "image": "PMC-VQA/images/PMC4632949_fig02_440920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image is displayed in the bottom row of the figure? \nChoices:\n(A) Whole heart staining.\n(B) Cell staining.\n(C) Injured tissue.\n(D) Untreated mice." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.78125, "loss": 4.323486328125 }, { "id": "identity_136406", "image": "DVQA/images/bar_train_00085728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest overall preference for all objects combined?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 34.6875, "loss": 4.126016139984131 }, { "id": "identity_21054", "image": "PMC-VQA/images/PMC5466204_F1_177296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT scan show? \nChoices:\n(A) Blood vessel dilation on the left inner orbital wall and postnasal bones. \n(B) Bone destruction on the right inner orbital wall and postnasal bones. \n(C) Tissue inflammation on both inner orbital walls. \n(D) Fluid accumulation on the right inner orbital wall and postnasal bones. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.40625, "loss": 4.013871192932129 }, { "id": "identity_329638", "image": "A-OKVQA/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these mostly made of?\nChoices:\n(A) wood\n(B) rock\n(C) clay\n(D) silt" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.84375, "loss": 4.496996879577637 }, { "id": "identity_129248", "image": "A-OKVQA/images/99423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis balls can be seen on the shelf?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.828125, "loss": 4.620765686035156 }, { "id": "identity_32", "image": "VQA2.0/images/172947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the time on the clock correct?" }, { "from": "gpt", "value": "The answer is maybe" } ], "clip_score": 20.203125, "loss": 4.761678695678711 }, { "id": "identity_329583", "image": "AI2D/images/3648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagram above depicting?\nChoices:\n(A) Insect\n(B) Heart\n(C) Ear\n(D) Animal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.4069108963012695 }, { "id": "identity_323124", "image": "FigureQA/images/65339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Hot Pink the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.59375, "loss": 4.251959323883057 }, { "id": "identity_923", "image": "ScienceQA/images/1358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Iowa\n(B) Delaware\n(C) New Jersey\n(D) New York" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.801603317260742 }, { "id": "identity_281550", "image": "A-OKVQA/images/490044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is this?\nOptions:\n(A) rally\n(B) competition\n(C) farmer's market\n(D) concert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.375, "loss": 4.43068790435791 }, { "id": "identity_38314", "image": "FigureQA/images/15299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 4.711489677429199 }, { "id": "identity_274764", "image": "FigureQA/images/98656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.859375, "loss": 4.1870927810668945 }, { "id": "identity_319427", "image": "A-OKVQA/images/166586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What food group do these belong to?\nChoices:\n(A) vegetables\n(B) grains\n(C) fruits\n(D) seeds" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.328125, "loss": 4.052422523498535 }, { "id": "identity_328201", "image": "PMC-VQA/images/PMC7399011_Fig1_225332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cancer is shown in the image?\nChoices:\n(A) Rectal cancer\n(B) Transverse colon cancer\n(C) Sigmoid colon cancer\n(D) Descending colon cancer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.71875, "loss": 4.297708511352539 }, { "id": "identity_332311", "image": "ScienceQA/images/1329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown here?\nChoices:\n(A) New Hampshire\n(B) New York\n(C) New Jersey\n(D) Mississippi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.5625, "loss": 4.895796298980713 }, { "id": "identity_310964", "image": "ScienceQA/images/9575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this food web, which organism's matter eventually transfers to the parasol fungus?\nChoices:\n(A) gray fox\n(B) persimmon tree" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.703125, "loss": 4.016104698181152 }, { "id": "identity_303780", "image": "PMC-VQA/images/PMC8131786_ece37408-fig-0007_444854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the microscopy technique shown in the image?\nChoices:\n(A) SEM \n(B) TEM \n(C) Polarized light microscopy \n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.15625, "loss": 5.001683235168457 }, { "id": "identity_232609", "image": "TQA/images/question_images/glaciers_6938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the part of the diagram that is formed by the intersection of three or more cirques, and explain how this feature is related to the surrounding glacial formations" }, { "from": "gpt", "value": "The answer is horn" } ], "clip_score": 35.84375, "loss": 5.41207218170166 }, { "id": "identity_28548", "image": "A-OKVQA/images/206013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What topping is the blue-green one on the left slice of bread?\nChoices:\n(A) cheese\n(B) peanut butter\n(C) jam\n(D) candy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.125, "loss": 4.061336040496826 }, { "id": "identity_286090", "image": "ScienceQA/images/3058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nebraska?\nChoices:\n(A) Kansas City\n(B) Bismarck\n(C) Lincoln\n(D) Omaha" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.109375, "loss": 4.0797014236450195 }, { "id": "identity_271045", "image": "PMC-VQA/images/PMC7814018_Fig3_344950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aspect is represented by the coloring in the image? \nChoices:\n(A) Shape of the shell\n(B) Age of the shell\n(C) Shell thickness\n(D) Shell roughness" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.546875, "loss": 4.732964515686035 }, { "id": "identity_321262", "image": "PMC-VQA/images/PMC7339797_Fig4_206309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to capture this image?\nChoices:\n(A) X-Ray \n(B) CT Scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.71875, "loss": 4.204878330230713 }, { "id": "identity_293260", "image": "Super-CLEVR/images/superCLEVR_new_001938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shapes of the small blue matte object and the large blue shiny object identical?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.09375, "loss": 5.369143486022949 }, { "id": "identity_228288", "image": "DocVQA/images/pfgn0226_145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the reason Gwen Bichard contacted the investigator, and what was the outcome of the conversation?" }, { "from": "gpt", "value": "The answer is Inquiry about IRB application; assured no MOP patients implanted" } ], "clip_score": 22.5625, "loss": 5.337505340576172 }, { "id": "identity_213903", "image": "VQA2.0/images/93332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.6796875, "loss": 4.436025619506836 }, { "id": "identity_200387", "image": "TextVQA/images/558f6df5aa49aaf9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stacks of paper are there on the desk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.34375, "loss": 4.039534568786621 }, { "id": "identity_316100", "image": "A-OKVQA/images/38572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the individual in pink ride?\nChoices:\n(A) dolphins\n(B) surf board\n(C) whales\n(D) donkeys" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 4.43408203125 }, { "id": "identity_287944", "image": "ScienceQA/images/1180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area? \nOptions:\n(A) Cuba\n(B) Haiti\n(C) Jamaica\n(D) Trinidad and Tobago" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.953125, "loss": 5.026003837585449 }, { "id": "identity_312488", "image": "VQA2.0/images/323478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ground?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 11.7890625, "loss": 4.112765789031982 }, { "id": "identity_38319", "image": "DVQA/images/bar_train_00028058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is fluid" } ], "clip_score": 23.015625, "loss": 4.965086936950684 }, { "id": "identity_201088", "image": "PMC-VQA/images/PMC6597434_FI190452cr-2_686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vaginas are indicated in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.046875, "loss": 4.589704513549805 }, { "id": "identity_269247", "image": "FigureQA/images/25778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.90625, "loss": 4.002434730529785 }, { "id": "identity_218885", "image": "A-OKVQA/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stockings are hung by the fireplace?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.15625, "loss": 4.545729637145996 }, { "id": "identity_72525", "image": "TQA/images/question_images/earth_day_night_96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the Earth is experiencing day time according to the image?" }, { "from": "gpt", "value": "The answer is The part facing the sunlight" } ], "clip_score": 33.5, "loss": 4.001950263977051 }, { "id": "identity_316998", "image": "TQA/images/question_images/human_system_reproductory_7042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these inside of?\nChoices:\n(A) Scrotum\n(B) Glans penis\n(C) Urethra\n(D) Bladder" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.25, "loss": 4.134697437286377 }, { "id": "identity_323797", "image": "TQA/images/question_images/aquifers_6531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the trees do to the water source?\nChoices:\n(A) Use the water and discharge it to the atmosphere\n(B) Take water from the atmosphere and feed it into the aquifer\n(C) Stabilise the aquifer\n(D) Confine the water in the aquifer to a particular site" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.09375, "loss": 4.512988567352295 }, { "id": "identity_326604", "image": "A-OKVQA/images/367608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of skate trick is being performed?\nChoices:\n(A) flip\n(B) grind\n(C) grab\n(D) manual" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.046875, "loss": 4.147500038146973 }, { "id": "identity_3328", "image": "DocVQA/images/xplf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the Exec. Sec'y. of Health and Hospital Division ?" }, { "from": "gpt", "value": "The answer is (miss) frieda brackebusch" } ], "clip_score": 23.09375, "loss": 4.172358512878418 }, { "id": "identity_22312", "image": "VQA2.0/images/65880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can you tell the men are warming up?" }, { "from": "gpt", "value": "The answer is only 1 team" } ], "clip_score": 15.484375, "loss": 4.125940799713135 }, { "id": "identity_336589", "image": "FigureQA/images/47091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Tomato?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.53125, "loss": 4.083392143249512 }, { "id": "identity_295778", "image": "IconQA/images/104906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 5.103255271911621 }, { "id": "identity_308724", "image": "Super-CLEVR/images/superCLEVR_new_000303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of metallic mountain bikes that are on the left side of the small purple vehicle and metallic trucks?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.125, "loss": 5.160686492919922 }, { "id": "identity_17678", "image": "A-OKVQA/images/324999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this space dedicated to displaying?\nChoices:\n(A) planes only\n(B) art\n(C) vehicles\n(D) ubers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.296875, "loss": 4.5453290939331055 }, { "id": "identity_31427", "image": "TextVQA/images/534f4ce2ac0805ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what company made the jars?" }, { "from": "gpt", "value": "The answer is diesel hpr" } ], "clip_score": 28.4375, "loss": 5.047874927520752 }, { "id": "identity_216314", "image": "A-OKVQA/images/241076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cyclist's shirt?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 13.828125, "loss": 4.0821123123168945 }, { "id": "identity_335935", "image": "ScienceQA/images/7966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Phoenix\n(B) Olympia\n(C) Tucson\n(D) Denver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.859375, "loss": 4.3783793449401855 }, { "id": "identity_24058", "image": "PMC-VQA/images/PMC4832464_Fig9_21709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the species of the brain shown in the image? \nChoices:\n(A) O. priapus \n(B) O. mucofloris \n(C) O. cerevisiae \n(D) O. singhara " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.765625, "loss": 4.467790126800537 }, { "id": "identity_9498", "image": "TQA/images/abc_question_images/cycle_water_11481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is taking place at label X?\nChoices:\n(A) TRANSPIRATION\n(B) INFILTRATION\n(C) EVAPORATION\n(D) CONDENSATION" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.640625, "loss": 4.368622779846191 }, { "id": "identity_297188", "image": "TabMWP/images/30816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.203125, "loss": 4.196707248687744 }, { "id": "identity_108804", "image": "VQA-AS/images/12857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.484375, "loss": 4.175618648529053 }, { "id": "identity_288452", "image": "PMC-VQA/images/PMC4851676_Fig4_26401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was seen inside the tubule lumen?\nChoices:\n(A) Mitochondria \n(B) Vacuole \n(C) Lysosome \n(D) Ribosome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.265625, "loss": 4.093740463256836 }, { "id": "identity_206611", "image": "TextVQA/images/33723febf95151ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are on the team wearing red uniforms?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.234375, "loss": 4.427303791046143 }, { "id": "identity_17457", "image": "AI2D/images/1297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Letter A refers to which stage?\nChoices:\n(A) Stage B\n(B) Stage D\n(C) Stage C\n(D) Stage A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.34375, "loss": 4.262178897857666 }, { "id": "identity_34685", "image": "AI2D/images/4634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are the primary consumers in this food web?\nChoices:\n(A) hawks and owls\n(B) snakes\n(C) rabbits, grasshoppers and crickets\n(D) mice and rats" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.03125, "loss": 4.070068836212158 }, { "id": "identity_25440", "image": "PMC-VQA/images/PMC8152131_Fig3_451772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which perspective was the CT image taken? \nChoices:\n(A) Sagittal \n(B) Coronal \n(C) Axial \n(D) Horizontal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.015625, "loss": 4.6381120681762695 }, { "id": "identity_280010", "image": "A-OKVQA/images/186461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for him extending his arm out?\nChoices:\n(A) hold\n(B) wave\n(C) balance\n(D) break fall" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.203125, "loss": 4.419720649719238 }, { "id": "identity_110951", "image": "TextVQA/images/79049a2bd16e9ac6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many characters are visibly holding newspapers?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.609375, "loss": 4.330466270446777 }, { "id": "identity_110294", "image": "A-OKVQA/images/216581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are seated on the bench?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.859375, "loss": 4.4496331214904785 }, { "id": "identity_306181", "image": "TextVQA/images/3fa6c2b883eee9da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book on this desk?" }, { "from": "gpt", "value": "The answer is semantic web" } ], "clip_score": 21.28125, "loss": 4.589367389678955 }, { "id": "identity_36964", "image": "A-OKVQA/images/174766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the rider wearing protection?\nChoices:\n(A) elbows\n(B) knees\n(C) chest\n(D) head" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.5546875, "loss": 4.490139961242676 }, { "id": "identity_242440", "image": "VQA2.0/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it likely to be, considering the lighting conditions and the presence of car headlights?" }, { "from": "gpt", "value": "The answer is Early evening" } ], "clip_score": 28.328125, "loss": 4.301938533782959 }, { "id": "identity_38272", "image": "A-OKVQA/images/11805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of even is being held?\nChoices:\n(A) wedding\n(B) birthday party\n(C) farmers market\n(D) reception" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.71875, "loss": 4.0952911376953125 }, { "id": "identity_286099", "image": "TextVQA/images/00b788d91c0084f5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the current date on this computer?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 13.8125, "loss": 4.023209571838379 }, { "id": "identity_33342", "image": "PMC-VQA/images/PMC6194678_Fig2_383977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the loose bodies located in the knee? \nChoices:\n(A) Anterior joint space \n(B) Lateral joint space \n(C) Medial joint space \n(D) Posterior joint space " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.5, "loss": 4.155614376068115 }, { "id": "identity_332335", "image": "IconQA/images/24370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a train. What is the fourth image?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.90625, "loss": 4.572727680206299 }, { "id": "identity_329587", "image": "PMC-VQA/images/PMC4360923_Fig3_367910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image (e) show after some processing? \nChoices:\n(A) Original image \n(B) Binary segmentation of a region \n(C) Histogram equalization of an image \n(D) Image enhancement using a filter" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2890625, "loss": 4.4933342933654785 }, { "id": "identity_31298", "image": "Super-CLEVR/images/superCLEVR_new_001187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the rubber fighter the same color as the tiny matte car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.453125, "loss": 5.144433498382568 }, { "id": "identity_17997", "image": "AI2D/images/3324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This is the smallest structure in life that can function independantly.\nChoices:\n(A) cell\n(B) organism\n(C) flower\n(D) mouse" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.1875, "loss": 4.201288223266602 }, { "id": "identity_128015", "image": "A-OKVQA/images/570344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trams can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.796875, "loss": 4.414214134216309 }, { "id": "identity_326304", "image": "FigureQA/images/31731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.65625, "loss": 4.075408458709717 }, { "id": "identity_336982", "image": "A-OKVQA/images/67966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is closest to this?\n\nChoices:\n(A) dog\n(B) woman\n(C) cat\n(D) child" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.4921875, "loss": 4.165436744689941 }, { "id": "identity_29389", "image": "A-OKVQA/images/395013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This flag is belongs to which country?\nChoices:\n(A) russia\n(B) uk\n(C) nepal\n(D) us" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.8125, "loss": 4.057887077331543 }, { "id": "identity_28187", "image": "DVQA/images/bar_train_00130091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is get" } ], "clip_score": 19.203125, "loss": 4.518932819366455 }, { "id": "identity_64852", "image": "ScienceQA/images/17916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of leaves is the sloth hanging from?" }, { "from": "gpt", "value": "The answer is Broad, green leaves" } ], "clip_score": 33.125, "loss": 4.080538272857666 }, { "id": "identity_25029", "image": "DocVQA/images/qlhx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where did the article \"Vertebrate Keratinization\" appear?" }, { "from": "gpt", "value": "The answer is primate news" } ], "clip_score": 20.03125, "loss": 4.541255950927734 }, { "id": "identity_296124", "image": "A-OKVQA/images/15827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which individuals typically operate these kinds of vehicles? \nOptions:\n(A) taxi drivers\n(B) military sergeants\n(C) bus drivers\n(D) motorcyclists" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.484375, "loss": 4.085949897766113 }, { "id": "identity_289395", "image": "VizWiz/images/VizWiz_train_00014374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.640625, "loss": 4.050105571746826 }, { "id": "identity_28775", "image": "A-OKVQA/images/101860.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What MLB team does the man up at bat play for?\nChoices:\n(A) orioles\n(B) mets\n(C) mariners\n(D) braves" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.081271171569824 }, { "id": "identity_14080", "image": "AI2D/images/3220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The ovule-bearing lower part of a pistil that ripens into a fruit is called as ?\nChoices:\n(A) filament\n(B) receptacle\n(C) ovary\n(D) petal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.578125, "loss": 4.11667013168335 }, { "id": "identity_281759", "image": "ScienceQA/images/1436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Illinois?\nChoices:\n(A) Indianapolis\n(B) Chicago\n(C) Springfield\n(D) Olympia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0, "loss": 4.154548168182373 }, { "id": "identity_193814", "image": "PMC-VQA/images/PMC5950188_Fig1_313973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 17.453125, "loss": 4.164016246795654 }, { "id": "identity_303468", "image": "FigureQA/images/9808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Dark Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.008025169372559 }, { "id": "identity_295003", "image": "A-OKVQA/images/418346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found on the fence among the following options: statues, hair clips, monkeys, or eggs?\nChoices:\n(A) statues\n(B) hair clips\n(C) monkeys\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.047947883605957 }, { "id": "identity_157433", "image": "AI2D/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct layers or parts are labeled in the seed diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.21875, "loss": 4.076560974121094 }, { "id": "identity_282315", "image": "PMC-VQA/images/PMC2754492_F1_47134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging plane is depicted in the image provided?\nChoices:\n(A) Sagittal\n(B) Coronal\n(C) Axial\n(D) Oblique" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.734375, "loss": 4.021852970123291 }, { "id": "identity_334430", "image": "VQA2.0/images/204275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the surface?" }, { "from": "gpt", "value": "The answer is shadows" } ], "clip_score": 13.4375, "loss": 4.060742378234863 }, { "id": "identity_19864", "image": "TextVQA/images/c6bcf285912b386e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of station is on the red sign?" }, { "from": "gpt", "value": "The answer is game trito station" } ], "clip_score": 20.765625, "loss": 4.25420618057251 }, { "id": "identity_299620", "image": "A-OKVQA/images/365206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of traffic is permitted to pass through this street at the current time? Is it limited to foot traffic only, open to all types of vehicles, restricted to buses, or exclusive to motorcycles?\nChoices:\n(A) foot only\n(B) all\n(C) buses\n(D) motorcycles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.140625, "loss": 4.242337226867676 }, { "id": "identity_263828", "image": "FigureQA/images/64404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Slate? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.21875, "loss": 4.327315330505371 }, { "id": "identity_21841", "image": "A-OKVQA/images/45844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the boy in the foreground doing?\nChoices:\n(A) eating lunch\n(B) playing tennis\n(C) cleaning park\n(D) threatening others" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.84375, "loss": 4.473075866699219 }, { "id": "identity_284672", "image": "PMC-VQA/images/PMC4086335_fig1_304253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image in (c) among the following choices: T2 SE, T1 SE, T1 IR, or T2 FRF?\nChoices:\n(A) T2 SE \n(B) T1 SE \n(C) T1 IR \n(D) T2 FRF" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.059815406799316 }, { "id": "identity_275032", "image": "VizWiz/images/VizWiz_train_00005350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the name of this food?" }, { "from": "gpt", "value": "The answer is this meat" } ], "clip_score": 21.265625, "loss": 5.655030727386475 }, { "id": "identity_311431", "image": "TQA/images/abc_question_images/nuclear_energy_17094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the reactor?\nChoices:\n(A) C\n(B) V\n(C) S\n(D) W" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.421875, "loss": 4.75347900390625 }, { "id": "identity_106031", "image": "Super-CLEVR/images/superCLEVR_new_000306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located between the blue bicycle and the golden bicycle?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.96875, "loss": 4.557797431945801 }, { "id": "identity_315614", "image": "ScienceQA/images/20355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown in the image?\nChoices:\n(A) South America\n(B) North America\n(C) Asia\n(D) Europe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.453125, "loss": 4.025728702545166 }, { "id": "identity_338463", "image": "IconQA/images/68509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape shown here?\nChoices:\n(A) cone\n(B) cube\n(C) sphere\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.5625, "loss": 4.61699914932251 }, { "id": "identity_24534", "image": "A-OKVQA/images/292697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the layout of this picture called?\nChoices:\n(A) photography\n(B) finger paint\n(C) square match\n(D) collage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.328125, "loss": 4.234726905822754 }, { "id": "identity_1191", "image": "IconQA/images/68073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a crab. Which picture is third?\nChoices:\n(A) crab\n(B) boat\n(C) bucket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.40625, "loss": 4.402181625366211 }, { "id": "identity_8147", "image": "PMC-VQA/images/PMC3459703_F1_157660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lower panel of the image displaying? \nChoices:\n(A) Areas of brain activation rendered on spatially normalized brains. \n(B) Dynamic facial expressions in the autism spectrum disorders group. \n(C) Static facial expressions in the control group. \n(D) Autism spectrum disorders group. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.0, "loss": 4.001654624938965 }, { "id": "identity_132242", "image": "TextVQA/images/06a7f1d5b80884dd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many posters can be seen on the door to the right of the shop?" }, { "from": "gpt", "value": "The answer is Four posters" } ], "clip_score": 22.546875, "loss": 4.279736518859863 }, { "id": "identity_307266", "image": "FigureQA/images/82982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.10444450378418 }, { "id": "identity_3795", "image": "PMC-VQA/images/PMC8131786_ece37408-fig-0007_444854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of microscopy used in the image? \nChoices:\n(A) SEM \n(B) TEM \n(C) Polarized light microscopy \n(D) Fluorescence microscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 5.158438205718994 }, { "id": "identity_120338", "image": "VQA2.0/images/47347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are using umbrellas?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.484375, "loss": 4.3113484382629395 }, { "id": "identity_320936", "image": "PMC-VQA/images/PMC4631946_F15_440502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging is utilized in this study?\nChoices:\n(A) Magnetic Resonance Imaging (MRI)\n(B) Single Photon Emission Computed Tomography (SPECT)\n(C) Computed Tomography (CT)\n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.734375, "loss": 4.791499137878418 }, { "id": "identity_283041", "image": "A-OKVQA/images/223580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of food is most likely to be served at this place?\nChoices:\n(A) salads\n(B) filet mignon\n(C) hot dog\n(D) chile" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.3203125, "loss": 4.091405868530273 }, { "id": "identity_320257", "image": "FigureQA/images/88824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.078125, "loss": 4.776209831237793 }, { "id": "identity_22879", "image": "PMC-VQA/images/PMC2248189_F3_17874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red color signify in the image? \nChoices:\n(A) EGFP fluorescence \n(B) Organelle marker \n(C) Secondary antibody \n(D) Background noise " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 5.208378791809082 }, { "id": "identity_268459", "image": "ScienceQA/images/10502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Through which ocean does the prime meridian pass?\nChoices:\n(A) the Arctic Ocean\n(B) the Pacific Ocean\n(C) the Indian Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.25, "loss": 4.390345096588135 }, { "id": "identity_112504", "image": "A-OKVQA/images/491169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles in the image have visible license plates?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.390625, "loss": 4.576531887054443 }, { "id": "identity_21287", "image": "A-OKVQA/images/195606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people walking in?\nChoices:\n(A) sand\n(B) rain\n(C) snow\n(D) park" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.375, "loss": 4.771514892578125 }, { "id": "identity_316847", "image": "TextVQA/images/329087659dd853b8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being advertised on the wall?" }, { "from": "gpt", "value": "The answer is guycooklandco" } ], "clip_score": 14.515625, "loss": 5.263750076293945 }, { "id": "identity_189051", "image": "A-OKVQA/images/223276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible posters are there in the background?" }, { "from": "gpt", "value": "The answer is Two posters" } ], "clip_score": 12.6484375, "loss": 4.208468914031982 }, { "id": "identity_284902", "image": "A-OKVQA/images/274022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brand of coffee is the most easily accessible here out of Starbucks, Burger King, McDonald's, and Peet's?\nChoices:\n(A) starbucks\n(B) burger king\n(C) mcdonald's\n(D) peets" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.984375, "loss": 4.015588283538818 }, { "id": "identity_151515", "image": "AI2D/images/1143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are attached to the stem?" }, { "from": "gpt", "value": "The answer is Four leaves" } ], "clip_score": 21.734375, "loss": 4.45623779296875 }, { "id": "identity_289383", "image": "Super-CLEVR/images/superCLEVR_new_000507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the location of the tiny green bicycle in relation to the regular bus and the matte road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.21875, "loss": 4.492545127868652 }, { "id": "identity_338436", "image": "A-OKVQA/images/5256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of text sign is depicted?\nChoices:\n(A) brand\n(B) directional\n(C) warning\n(D) regulatory" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.296875, "loss": 4.645943641662598 }, { "id": "identity_152970", "image": "ScienceQA/images/2612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are fruits?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.25, "loss": 4.55522346496582 }, { "id": "identity_279610", "image": "A-OKVQA/images/305000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the possible item that the lady in this location could buy from the following options: grains, produce, toys, or meats?\nChoices:\n(A) grains\n(B) produce\n(C) toys\n(D) meats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.75, "loss": 4.645723819732666 }, { "id": "identity_323454", "image": "ScienceQA/images/17747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Richmond\n(B) Birmingham\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.171875, "loss": 4.239511966705322 }, { "id": "identity_331480", "image": "AI2D/images/4506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which category does this leaf belong?\nChoices:\n(A) lacerate\n(B) undulate\n(C) entire\n(D) crenate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.625, "loss": 4.370288372039795 }, { "id": "identity_4313", "image": "A-OKVQA/images/469435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people holding?\nChoices:\n(A) cats\n(B) apples\n(C) pens\n(D) pumpkins" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5078125, "loss": 4.283813953399658 }, { "id": "identity_15550", "image": "Super-CLEVR/images/superCLEVR_new_000056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that has the same material as the blue thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.0625, "loss": 5.1472954750061035 }, { "id": "identity_329315", "image": "PMC-VQA/images/PMC5908617_F1_304248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are the line drawn parallel to?\nChoices:\n(A) dorsal surfaces \n(B) lateral surfaces \n(C) ventral surfaces \n(D) anterior surfaces" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.734375, "loss": 4.3591203689575195 }, { "id": "identity_287115", "image": "PMC-VQA/images/PMC4602428_F7_432145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which area do the lymph nodes exhibit heightened metabolic activity?\nChoices:\n(A) Left supraclavicular fossa\n(B) Right supraclavicular fossa\n(C) Mediastinum\n(D) Abdomen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.229577541351318 }, { "id": "identity_323494", "image": "PMC-VQA/images/PMC9250258_Fig2_329773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which layer of the retina is shown in this study?\nChoices:\n(A) Inner nuclear layer\n(B) Outer nuclear layer\n(C) Superficial capillary plexus\n(D) Deep capillary plexus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.09375, "loss": 4.019680976867676 }, { "id": "identity_221305", "image": "Super-CLEVR/images/superCLEVR_new_000223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.078125, "loss": 4.433975696563721 }, { "id": "identity_332599", "image": "A-OKVQA/images/99539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely drink in the cup on this table?\nChoices:\n(A) orange juice\n(B) coffee\n(C) alcohol\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.6015625, "loss": 4.3878068923950195 }, { "id": "identity_326188", "image": "PMC-VQA/images/PMC3681988_pone-0066137-g022_211303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is depicted in the micrograph?\nChoices:\n(A) Mid-brain\n(B) Posterior region\n(C) Anterior region\n(D) Unidentifiable region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.45451021194458 }, { "id": "identity_127929", "image": "A-OKVQA/images/285096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the hull of the yellow boat?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 27.09375, "loss": 4.07067346572876 }, { "id": "identity_140902", "image": "DocVQA/images/slmm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is giving the opening remarks at the conference?" }, { "from": "gpt", "value": "The answer is Ray Seltser" } ], "clip_score": 23.265625, "loss": 5.212395668029785 }, { "id": "identity_120333", "image": "PMC-VQA/images/PMC7435725_materials-13-03400-f006_237018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are marked with orange arrows?" }, { "from": "gpt", "value": "The answer is Four objects" } ], "clip_score": 23.75, "loss": 4.725465774536133 }, { "id": "identity_104781", "image": "VQA2.0/images/522708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the sidewalk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.53125, "loss": 4.6184401512146 }, { "id": "identity_299828", "image": "ScienceQA/images/9646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated by the highlight?\nOptions:\n(A) Asia\n(B) Africa\n(C) Antarctica\n(D) South America" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.8125, "loss": 4.03848934173584 }, { "id": "identity_337292", "image": "A-OKVQA/images/143277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the vehicles positioned along the sidewalk?\nChoices:\n(A) to park\n(B) to wash\n(C) car show\n(D) to race" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.09375, "loss": 4.204744815826416 }, { "id": "identity_262183", "image": "AI2D/images/3081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outer boundary of the cell?\nChoices:\n(A) Cytoplasm\n(B) Mitochondrion\n(C) Nucleus\n(D) Cell membrane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.53125, "loss": 4.116807460784912 }, { "id": "identity_18067", "image": "FigureQA/images/90488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Blue have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.046875, "loss": 4.153090476989746 }, { "id": "identity_196769", "image": "A-OKVQA/images/473590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food item is being held by the person on the left?" }, { "from": "gpt", "value": "The answer is A pretzel" } ], "clip_score": 20.78125, "loss": 4.415675163269043 }, { "id": "identity_16440", "image": "TQA/images/abc_question_images/parts_microscope_17198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the lenses that adjust viewing?\nChoices:\n(A) J\n(B) M\n(C) A\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.03125, "loss": 4.038559436798096 }, { "id": "identity_299537", "image": "A-OKVQA/images/462026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of stand is supporting the motorcycle? \n(A) music stand \n(B) display stand \n(C) kick stand \n(D) grand stand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.203125, "loss": 4.310225009918213 }, { "id": "identity_192998", "image": "A-OKVQA/images/107247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bus's advertisement?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 27.125, "loss": 4.445043563842773 }, { "id": "identity_25523", "image": "ScienceQA/images/6360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nOf the four largest planets, three are made mainly of gas.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.0, "loss": 4.0082855224609375 }, { "id": "identity_322922", "image": "A-OKVQA/images/432704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are the people wearing yellow?\nChoices:\n(A) crew\n(B) doctors\n(C) chefs\n(D) students" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.234375, "loss": 4.3046793937683105 }, { "id": "identity_42784", "image": "DocVQA/images/rrvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general sentiment about the meeting in Prague according to the note?" }, { "from": "gpt", "value": "The answer is Less and less attended and seems less important" } ], "clip_score": 24.78125, "loss": 4.640893459320068 }, { "id": "identity_8581", "image": "DocVQA/images/khgl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who wrote the letter?" }, { "from": "gpt", "value": "The answer is CJB" } ], "clip_score": 24.1875, "loss": 4.682770252227783 }, { "id": "identity_266303", "image": "TQA/images/abc_question_images/skin_cross_section_17583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can the highest amount of adipose tissue be found?\nChoices:\n(A) L\n(B) P\n(C) K\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 4.092408657073975 }, { "id": "identity_32240", "image": "A-OKVQA/images/553116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the player standing on the base ready to do?\nChoices:\n(A) tackle\n(B) dribble\n(C) swing\n(D) dunk" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.390625, "loss": 4.499595642089844 }, { "id": "identity_313425", "image": "ScienceQA/images/12371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this text talk about the weather or the climate?\nChoices:\n(A) climate\n(B) weather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.9296875, "loss": 4.4984002113342285 }, { "id": "identity_253066", "image": "A-OKVQA/images/27005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name on the barrel and what type of vehicle is carrying it?" }, { "from": "gpt", "value": "The answer is Fortier Crackers, delivery truck" } ], "clip_score": 29.984375, "loss": 4.630213260650635 }, { "id": "identity_51595", "image": "DVQA/images/bar_train_00193079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy on the string dataset?" }, { "from": "gpt", "value": "The answer is Both bent and sky" } ], "clip_score": 34.40625, "loss": 4.414262771606445 }, { "id": "identity_198053", "image": "PMC-VQA/images/PMC2994755_pone-0014150-g005_80034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the blue color in the brain image represent?" }, { "from": "gpt", "value": "The answer is It represents regions of brain activation or correlation" } ], "clip_score": 28.921875, "loss": 4.246228218078613 }, { "id": "identity_257518", "image": "A-OKVQA/images/259567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of objects can you identify in the image, and what are they?" }, { "from": "gpt", "value": "The answer is ten types of objects" } ], "clip_score": 13.5234375, "loss": 4.101373195648193 }, { "id": "identity_308125", "image": "TQA/images/question_images/parts_worm_7313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure used for \"sperm storage\"?\nChoices:\n(A) Testes\n(B) Seminal Vesicle\n(C) Vas Deferens\n(D) Copulatory Bursa" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.609375, "loss": 4.221375465393066 }, { "id": "identity_192182", "image": "TextVQA/images/d6e8bc5b8ee5d66e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person second from the right?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.3125, "loss": 4.604444980621338 }, { "id": "identity_282211", "image": "PMC-VQA/images/PMC7316744_Fig6_199222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the cyan areas in the image? \nChoices:\n(A) the coral tissue autofluorescence\n(B) the chlorophyll autofluorescence\n(C) the non-fluorescent coral skeleton underneath the tissue\n(D) the excessive light conditions." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.22043514251709 }, { "id": "identity_329404", "image": "PMC-VQA/images/PMC3897975_F1_260150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used here?\nChoices:\n(A) Scanning electron microscopy\n(B) Transmission electron microscopy\n(C) Confocal microscopy\n(D) X-ray diffraction" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.28125, "loss": 4.105154037475586 }, { "id": "identity_272134", "image": "AI2D/images/313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which of the following animals feed on grasshoppers: Hawks, Snakes, Frogs, or Other grasshoppers?\nChoices:\n(A) Hawks\n(B) Snakes\n(C) Frogs\n(D) Other grasshoppers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.625, "loss": 4.100670337677002 }, { "id": "identity_23419", "image": "PMC-VQA/images/PMC6945702_Fig1_95195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the liver is affected in this case? \nChoices:\n(A) Right lobe \n(B) Left lobe \n(C) Caudate lobe \n(D) Quadrate lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.390472888946533 }, { "id": "identity_298742", "image": "PMC-VQA/images/PMC4875637_Fig2_32540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the yellow color in the image?\nChoices:\n(A) Hypomyelination \n(B) High signal intensity from properly myelinated WM \n(C) Lesions in the white matter \n(D) Artefacts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.40625, "loss": 4.560118675231934 }, { "id": "identity_17221", "image": "Super-CLEVR/images/superCLEVR_new_001905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a motorbike of the same color as the big fighter?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.34375, "loss": 5.129021167755127 }, { "id": "identity_326148", "image": "A-OKVQA/images/136300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these creatures have?\nChoices:\n(A) horns\n(B) quills\n(C) hooves\n(D) gills" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.203125, "loss": 4.139449119567871 }, { "id": "identity_300543", "image": "AI2D/images/2292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram indicates a fully developed organism?\nChoices:\n(A) C\n(B) B\n(C) E\n(D) A" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.859375, "loss": 4.2402143478393555 }, { "id": "identity_332425", "image": "A-OKVQA/images/5018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who has control when riding in this vehicle adorned with a flag?\nChoices:\n(A) left\n(B) neither\n(C) right\n(D) both" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.53125, "loss": 4.016114234924316 }, { "id": "identity_274318", "image": "TextVQA/images/727356147414700a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text located above the blue mountain logo?" }, { "from": "gpt", "value": "The answer is the world famous" } ], "clip_score": 27.765625, "loss": 5.58160400390625 }, { "id": "identity_323986", "image": "PMC-VQA/images/PMC8044272_Fig1_417220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to detect the foreign body?\nChoices:\n(A) X-ray \n(B) Computed tomography (CT) scan \n(C) Magnetic resonance imaging (MRI) \n(D) Positron emission tomography (PET)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8046875, "loss": 4.315239906311035 }, { "id": "identity_32062", "image": "PMC-VQA/images/PMC5709000_F5_246781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the body? \nChoices:\n(A) left uterine adnexa \n(B) pelvis \n(C) right uterine adnexa \n(D) right kidney " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.78125, "loss": 4.513882637023926 }, { "id": "identity_323067", "image": "PMC-VQA/images/PMC6909617_Fig1_83161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is depicted in the image?\nChoices:\n(A) Left leg and foot \n(B) Left arm and hand \n(C) Right leg and foot \n(D) Right arm and hand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.183913230895996 }, { "id": "identity_287174", "image": "PMC-VQA/images/PMC7719586_Fig1_315991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What issue arose following the proximal embolization procedure? \nChoices:\n(A) Hemorrhage\n(B) Aneurysm\n(C) Infarct\n(D) Fistula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.765625, "loss": 4.0076727867126465 }, { "id": "identity_303100", "image": "DocVQA/images/nypm0020_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand?" }, { "from": "gpt", "value": "The answer is eclipse" } ], "clip_score": 19.421875, "loss": 4.51169490814209 }, { "id": "identity_295696", "image": "Super-CLEVR/images/superCLEVR_new_000042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small metallic dirtbike located on the left side of the yellow motorbike that is in front of the matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.875, "loss": 5.210002422332764 }, { "id": "identity_8326", "image": "Super-CLEVR/images/superCLEVR_new_000978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object that is left of the big metal bicycle and in front of the tiny yellow bus?" }, { "from": "gpt", "value": "The answer is airliner" } ], "clip_score": 29.984375, "loss": 4.789760589599609 }, { "id": "identity_266195", "image": "PMC-VQA/images/PMC6127925_Fig5_364669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the subject of the imaging in the lower panel? \n(A) Single array section \n(B) Carbon nanotube tape substrate \n(C) Magnified region \n(D) Multibeam SEM imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.47835111618042 }, { "id": "identity_320092", "image": "PMC-VQA/images/PMC7648378_Fig1_293222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is used to show this cross-table view of the acetabular component?\nChoices:\n(A) anteroposterior view \n(B) oblique view \n(C) lateral view \n(D) ultrasound view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.640625, "loss": 4.479857921600342 }, { "id": "identity_126139", "image": "VQA2.0/images/81128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.71875, "loss": 4.159628391265869 }, { "id": "identity_298854", "image": "A-OKVQA/images/114414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action will number 2 take next?\nChoices:\n(A) sit\n(B) return home\n(C) hit someone\n(D) run" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.7265625, "loss": 4.154907703399658 }, { "id": "identity_317577", "image": "FigureQA/images/55037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum for Saddle Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.65625, "loss": 4.23943567276001 }, { "id": "identity_103641", "image": "A-OKVQA/images/542717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vegetable directly to the left of the eggplants?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.828125, "loss": 4.380122184753418 }, { "id": "identity_3240", "image": "FigureQA/images/9542.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.015625, "loss": 4.0572710037231445 }, { "id": "identity_21279", "image": "A-OKVQA/images/289569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman on the purple sign doing?\nChoices:\n(A) dancing\n(B) praying\n(C) eating\n(D) singing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.619156837463379 }, { "id": "identity_323774", "image": "Super-CLEVR/images/superCLEVR_new_001308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that is the same size as this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 13.9609375, "loss": 4.674964427947998 }, { "id": "identity_187733", "image": "VQA2.0/images/169322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many meat patties are there on the plate?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 25.546875, "loss": 4.527492523193359 }, { "id": "identity_217101", "image": "A-OKVQA/images/204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the back of the bus promoting?" }, { "from": "gpt", "value": "The answer is Ruby Mart" } ], "clip_score": 26.8125, "loss": 4.530488014221191 }, { "id": "identity_201520", "image": "PMC-VQA/images/PMC8874131_F2_204782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct fluorescent colors can be seen in the image on the right?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.171875, "loss": 4.219440460205078 }, { "id": "identity_109530", "image": "TextVQA/images/17aff2e17444be0d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown on the road in the image?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 13.1015625, "loss": 4.2252397537231445 }, { "id": "identity_270756", "image": "Super-CLEVR/images/superCLEVR_new_000057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any cruiser have the same color as the tiny utility bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.59375, "loss": 5.147725582122803 }, { "id": "identity_329528", "image": "TextVQA/images/58825d09c218fa20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last letter in green on the person's scarf?" }, { "from": "gpt", "value": "The answer is es" } ], "clip_score": 25.046875, "loss": 4.213801383972168 }, { "id": "identity_312644", "image": "PMC-VQA/images/PMC9354060_fig3_364481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique is depicted in the lower panels?\nChoices:\n(A) Isolation method\n(B) Polymerase chain reaction\n(C) Electrophoresis\n(D) Gel extraction" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.8125, "loss": 4.824643135070801 }, { "id": "identity_16422", "image": "IconQA/images/94992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is eighth?\nChoices:\n(A) moon\n(B) rocket\n(C) star" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.421875, "loss": 4.365756034851074 }, { "id": "identity_116273", "image": "A-OKVQA/images/334291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of stove is present in the kitchen?" }, { "from": "gpt", "value": "The answer is Electric" } ], "clip_score": 23.703125, "loss": 4.013489723205566 }, { "id": "identity_8790", "image": "TextVQA/images/3b428af0e7026726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what soda company is on the side of the cooler?" }, { "from": "gpt", "value": "The answer is beautiful" } ], "clip_score": 26.3125, "loss": 5.701526165008545 }, { "id": "identity_321749", "image": "A-OKVQA/images/465418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this mesh bin for?\nChoices:\n(A) oil\n(B) garbage\n(C) recycling\n(D) compost" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.296875, "loss": 4.437759876251221 }, { "id": "identity_193922", "image": "PMC-VQA/images/PMC5015237_Fig2_71228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the circle marking the abnormality?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.359375, "loss": 4.143915176391602 }, { "id": "identity_188665", "image": "VQA2.0/images/353912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the house door directly in the center of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.046875, "loss": 4.075204849243164 }, { "id": "identity_319077", "image": "A-OKVQA/images/92974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the fenced-in area behind the person lead to?\nChoices:\n(A) subway\n(B) park\n(C) jail\n(D) deli" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7109375, "loss": 4.473988056182861 }, { "id": "identity_269875", "image": "FigureQA/images/81817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is true: Is Cornflower greater than Peru?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.4375, "loss": 4.006032466888428 }, { "id": "identity_14462", "image": "ScienceQA/images/10887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Kiribati\n(B) Tuvalu\n(C) Tonga\n(D) Fiji" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0, "loss": 4.099074363708496 }, { "id": "identity_265460", "image": "A-OKVQA/images/340247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which location is renowned for hosting islands where this particular sport is practiced?\nChoices:\n(A) Siberia\n(B) Hawaii\n(C) Egypt\n(D) Kazakhstan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6796875, "loss": 4.240707874298096 }, { "id": "identity_317658", "image": "IconQA/images/69253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a crab. What is the third image?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.15625, "loss": 4.035345077514648 }, { "id": "identity_19542", "image": "ScienceQA/images/12655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Idaho?\nChoices:\n(A) Houston\n(B) Pierre\n(C) Boise\n(D) Nampa" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.171875, "loss": 4.321102142333984 }, { "id": "identity_299056", "image": "PMC-VQA/images/PMC6747042_f1_38423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the black arrows in the image?\nChoices:\n(A) Correct fusions between sepals, \n(B) Imperfect fusions between sepals, \n(C) Healthy growth of sepals, \n(D) Lack of fusion between sepals." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.218581199645996 }, { "id": "identity_5495", "image": "ScienceQA/images/17745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this experiment, which were part of a control group?\nChoices:\n(A) the plots with no fires\n(B) the plots with fires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5234375, "loss": 4.395600318908691 }, { "id": "identity_315238", "image": "ScienceQA/images/806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) St. Louis\n(B) New Orleans\n(C) Houston\n(D) New York City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.094486236572266 }, { "id": "identity_278960", "image": "FigureQA/images/90812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Turquoise or Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.453125, "loss": 4.591935157775879 }, { "id": "identity_55794", "image": "DocVQA/images/xfjv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children does George E. Wilber, Jr. have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.984375, "loss": 4.134991645812988 }, { "id": "identity_301754", "image": "A-OKVQA/images/197329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tallest thing?\nChoices:\n(A) giraffe\n(B) ladder\n(C) tree\n(D) giant man" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.28125, "loss": 4.1082940101623535 }, { "id": "identity_271500", "image": "A-OKVQA/images/577893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following last names belongs to the athlete? \n(A) garrett\n(B) jackson\n(C) federer\n(D) jones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.265625, "loss": 4.37762451171875 }, { "id": "identity_52834", "image": "MapQA/images/map_7934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the highest percentage category for fans in the USA?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 35.40625, "loss": 4.303457736968994 }, { "id": "identity_276619", "image": "IconQA/images/42717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on? \nOptions:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.78125, "loss": 4.670660018920898 }, { "id": "identity_35199", "image": "A-OKVQA/images/241076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be purchased in the yellow building?\nChoices:\n(A) skateboards\n(B) bikes\n(C) sneakers\n(D) lottery tickets" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.7109375, "loss": 4.097299575805664 }, { "id": "identity_136554", "image": "DocVQA/images/xrvy0000_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What action plans are mentioned for maximizing cigarette industry volume (CIV)?" }, { "from": "gpt", "value": "The answer is Region, Division, Chain, Assignment" } ], "clip_score": 23.96875, "loss": 4.581760883331299 }, { "id": "identity_217156", "image": "VQA2.0/images/104337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many athletes are wearing a red sleeve on their right arm?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.984375, "loss": 4.452098369598389 }, { "id": "identity_327358", "image": "A-OKVQA/images/549849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these individuals doing?\nChoices:\n(A) sport\n(B) travel\n(C) debate\n(D) celebration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8984375, "loss": 4.249476432800293 }, { "id": "identity_216479", "image": "VQA-AS/images/10395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sun setting or rising?" }, { "from": "gpt", "value": "The answer is Setting" } ], "clip_score": 19.1875, "loss": 4.00093936920166 }, { "id": "identity_125466", "image": "VQA2.0/images/166926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hanging lamps are there above the kitchen area?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.375, "loss": 4.5957441329956055 }, { "id": "identity_199721", "image": "A-OKVQA/images/461805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the motorcycle?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 27.109375, "loss": 4.0520453453063965 }, { "id": "identity_294056", "image": "IconQA/images/86751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of ovals an even number or an odd number?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.765625, "loss": 4.395516395568848 }, { "id": "identity_334404", "image": "A-OKVQA/images/146841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of fruit is depicted?\nChoices:\n(A) banana\n(B) peach\n(C) watermelon\n(D) strawberry" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.046875, "loss": 4.237176418304443 }, { "id": "identity_20923", "image": "AI2D/images/2951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which machine do you use by raising or moving a weight at one end by pushing down on the other?\nChoices:\n(A) lever\n(B) pulley\n(C) inclined plane\n(D) screw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.029975891113281 }, { "id": "identity_326363", "image": "PMC-VQA/images/PMC6062049_pone.0201004.g003_346459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body was scanned in the patient?\nChoices:\n(A) Brain\n(B) Thighs\n(C) Arm\n(D) Chest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.953125, "loss": 4.523731708526611 }, { "id": "identity_785", "image": "A-OKVQA/images/518127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What else would you probably see in this giant structure with oversized balls?\nChoices:\n(A) soccer match\n(B) golf match\n(C) swimming competition\n(D) theater play" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.921875, "loss": 4.141138553619385 }, { "id": "identity_323283", "image": "DVQA/images/bar_train_00170082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 19.453125, "loss": 4.136513710021973 }, { "id": "identity_151061", "image": "AI2D/images/1766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are present in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.46875, "loss": 4.2528839111328125 }, { "id": "identity_301945", "image": "A-OKVQA/images/543084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of leaves do these trees have?\nChoices:\n(A) scale\n(B) needles\n(C) broadleaf\n(D) grass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.4921875, "loss": 4.2513861656188965 }, { "id": "identity_328159", "image": "DVQA/images/bar_train_00130091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is get" } ], "clip_score": 17.75, "loss": 4.464663028717041 }, { "id": "identity_27206", "image": "PMC-VQA/images/PMC4101959_fig13_306893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the edema present in the given image? \nChoices:\n(A) Right thalamus \n(B) Left thalamus \n(C) Cerebellum \n(D) Brainstem " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.140625, "loss": 4.262610912322998 }, { "id": "identity_309820", "image": "PMC-VQA/images/PMC7057593_Fig5_129341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method was utilized for pre-surgery planning?\nChoices:\n(A) Post-surgery X-ray \n(B) Pre-surgery X-ray \n(C) X-ray at the final follow-up \n(D) Pre-surgery CT with 3D reconstruction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.421875, "loss": 4.173562049865723 }, { "id": "identity_147005", "image": "DVQA/images/bar_train_00139737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest value for 'symbol'?" }, { "from": "gpt", "value": "The answer is notice" } ], "clip_score": 21.75, "loss": 4.1175079345703125 }, { "id": "identity_225592", "image": "A-OKVQA/images/461378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers does the wax lighting item have, and what is the total number of flags placed on it?" }, { "from": "gpt", "value": "The answer is Three layers, twelve flags" } ], "clip_score": 20.9375, "loss": 4.014781475067139 }, { "id": "identity_286534", "image": "PMC-VQA/images/PMC7071711_Fig14_133856.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In Patient 2, where are the inflammatory changes located?\nChoices:\n(A) Abdominal walls\n(B) Chest walls\n(C) Pelvic walls\n(D) Back walls" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.796875, "loss": 4.007662773132324 }, { "id": "identity_19005", "image": "A-OKVQA/images/296782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skateboarder likely to suffer from?\nChoices:\n(A) fatigue\n(B) dog bite\n(C) pricked feet\n(D) car accident" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.296875, "loss": 4.232244491577148 }, { "id": "identity_316671", "image": "DocVQA/images/rgfl0225_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the Study Site?" }, { "from": "gpt", "value": "The answer is Covance - Dallas" } ], "clip_score": 17.40625, "loss": 5.0294508934021 }, { "id": "identity_1227", "image": "TQA/images/question_images/earth_moon_phases_2553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the Moon located during the New Moon phase?\nChoices:\n(A) Partially between the Sun and the Earth\n(B) Behind the Sun\n(C) Between the Sun and the Earth\n(D) Behind the Earth" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.625, "loss": 4.280731201171875 }, { "id": "identity_220912", "image": "VQA-AS/images/6242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the sofa?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.96875, "loss": 4.586691379547119 }, { "id": "identity_184203", "image": "VQA2.0/images/287590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.96875, "loss": 4.461173057556152 }, { "id": "identity_328472", "image": "PMC-VQA/images/PMC4579082_pbio.1002253.g002_425907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is depicted in the transverse sections in panel (e,f)?\nChoices:\n(A) Soleus.\n(B) Masseter.\n(C) Biceps brachii.\n(D) Pectoralis minor." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.392902851104736 }, { "id": "identity_289965", "image": "A-OKVQA/images/80480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who among the following is likely to hit the ball soon: white pants, cannot tell, either one, or black pants?\nChoices:\n(A) white pants\n(B) cannot tell\n(C) either one\n(D) black pants" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9296875, "loss": 4.181920051574707 }, { "id": "identity_287808", "image": "FigureQA/images/52520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Periwinkle or Chocolate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.265625, "loss": 4.538071155548096 }, { "id": "identity_192236", "image": "PMC-VQA/images/PMC3789311_fig2_235557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the colors red and blue in the image?" }, { "from": "gpt", "value": "The answer is Blood flow direction relative to the probe" } ], "clip_score": 21.703125, "loss": 4.59384298324585 }, { "id": "identity_129874", "image": "VQA-AS/images/18345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are in the sky?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.09375, "loss": 4.248307228088379 }, { "id": "identity_19917", "image": "Super-CLEVR/images/superCLEVR_new_001937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the small rubber minivan and the small rubber bus have the same color?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.34375, "loss": 5.846445560455322 }, { "id": "identity_12370", "image": "ScienceQA/images/6705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Montana?\nChoices:\n(A) Helena\n(B) Missoula\n(C) Billings\n(D) Olympia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.71875, "loss": 4.066165924072266 }, { "id": "identity_23524", "image": "IconQA/images/16562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of flowers even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.0894455909729 }, { "id": "identity_243961", "image": "AI2D/images/4570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the picture above, identify and describe the differences between the total, partial, and annular eclipses, and explain which type of eclipse allows for the observation of the solar corona" }, { "from": "gpt", "value": "The answer is Total eclipse" } ], "clip_score": 35.5, "loss": 4.184086799621582 }, { "id": "identity_70151", "image": "ScienceQA/images/13385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does the bathtub have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.09375, "loss": 4.389684677124023 }, { "id": "identity_322364", "image": "TQA/images/question_images/earth_moon_phases_2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A few days after the new moon phase, what do you call the phase when the Moon will be visible again and lasts until the first quarter?\nChoices:\n(A) First Quarter\n(B) Waxing Gibbous\n(C) Waning Crescent\n(D) Waxing Crescent" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.71875, "loss": 4.287179470062256 }, { "id": "identity_266937", "image": "A-OKVQA/images/397658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the girl holding her hand to her ear?\nChoices:\n(A) posing\n(B) soothing pain\n(C) using phone\n(D) she's sleepy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.671875, "loss": 4.380455017089844 }, { "id": "identity_313563", "image": "ScienceQA/images/19314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms is a tertiary consumer in this food web?\nChoices:\n(A) grizzly bear\n(B) snowy owl\n(C) parasitic jaeger" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.578125, "loss": 4.2795538902282715 }, { "id": "identity_325395", "image": "PMC-VQA/images/PMC6598616_F2_1086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow pointing to in the image provided?\nChoices:\n(A) internal table of a frontal bone \n(B) external table of a parietal bone \n(C) external table of a frontal bone \n(D) internal table of a parietal bone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.171875, "loss": 4.079823017120361 }, { "id": "identity_268558", "image": "TQA/images/abc_question_images/parts_worm_17321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the part that corresponds to the dorsal blood vessel:\n(A) L\n(B) W\n(C) P\n(D) Y" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.234375, "loss": 4.123562812805176 }, { "id": "identity_276533", "image": "TabMWP/images/4510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 4.53567361831665 }, { "id": "identity_329393", "image": "A-OKVQA/images/143952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main thing being held as they walk by looking at the No 5 train?\nChoices:\n(A) lunch and\n(B) beer can\n(C) puppy\n(D) bicycle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.9921875, "loss": 4.068818092346191 }, { "id": "identity_66633", "image": "AI2D/images/284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many producers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 32.28125, "loss": 4.541082859039307 }, { "id": "identity_317981", "image": "VizWiz/images/VizWiz_train_00001865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is shown in this image?" }, { "from": "gpt", "value": "The answer is 4ycjfw8l" } ], "clip_score": 21.578125, "loss": 4.981757164001465 }, { "id": "identity_117226", "image": "PMC-VQA/images/PMC6566351_sensors-19-02361-f005_493505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the image be used to diagnose a stroke?" }, { "from": "gpt", "value": "The answer is Yes, with professional interpretation" } ], "clip_score": 33.09375, "loss": 4.039386749267578 }, { "id": "identity_265826", "image": "DocVQA/images/rlvm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anticipated result as indicated in the analysis?" }, { "from": "gpt", "value": "The answer is report" } ], "clip_score": 22.28125, "loss": 5.23443603515625 }, { "id": "identity_40407", "image": "DocVQA/images/lgcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 19.390625, "loss": 4.144530296325684 }, { "id": "identity_308964", "image": "AI2D/images/373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label best describes the mass of rock intruded between layers of other rock, causing uplift?\nChoices:\n(A) Q\n(B) O\n(C) L\n(D) B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.043952941894531 }, { "id": "identity_27248", "image": "TabMWP/images/28581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.54069185256958 }, { "id": "identity_128580", "image": "VQA2.0/images/159601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.5, "loss": 4.206610679626465 }, { "id": "identity_30910", "image": "PMC-VQA/images/PMC6848183_Fig4_65914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object stained by eosin in this image? \nChoices:\n(A) Neurons \n(B) Glia \n(C) Blood vessels \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.421875, "loss": 4.240813255310059 }, { "id": "identity_288607", "image": "A-OKVQA/images/200033.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What instrument is the man in the yellow shirt playing? Is it:\n(A) drums\n(B) guitar\n(C) buckets\n(D) harp" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.0703125, "loss": 4.436410427093506 }, { "id": "identity_311956", "image": "PMC-VQA/images/PMC8264432_f3_3540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What region is analyzed between two methods?\nChoices:\n(A) PTV region\n(B) OAR region\n(C) Skin\n(D) Bone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.734375, "loss": 4.155296325683594 }, { "id": "identity_32840", "image": "TQA/images/question_images/faults_1756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of faulting compresses rather than extends?\nChoices:\n(A) Strike-slip faulting\n(B) Hanging wall faulting\n(C) Normal faulting\n(D) Reverse and thrust faulting" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.59375, "loss": 4.0551838874816895 }, { "id": "identity_15742", "image": "PMC-VQA/images/PMC8049293_pone.0250177.g006_418737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which knee(s) showed increased bone loss after treatment with P. gingivalis?\nChoices:\n(A) Left knee only \n(B) Right knee only \n(C) Both knees \n(D) No knee showed increased bone loss. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.28125, "loss": 4.039714813232422 }, { "id": "identity_28484", "image": "A-OKVQA/images/30973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where should the heavier person sleep?\nChoices:\n(A) another room\n(B) floor\n(C) bottom\n(D) top" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.484375, "loss": 4.3944854736328125 }, { "id": "identity_200885", "image": "PMC-VQA/images/PMC5602807_Fig1_216782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we infer the phase of the CT scan based on the image?" }, { "from": "gpt", "value": "The answer is Portal venous phase" } ], "clip_score": 31.0, "loss": 4.22344970703125 }, { "id": "identity_245541", "image": "VQA-AS/images/7149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the couch where the cat is sleeping?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 33.4375, "loss": 4.394116401672363 }, { "id": "identity_272387", "image": "ScienceQA/images/1663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from each bottle to the surroundings or from the surroundings to each bottle?\nChoices:\n(A) each bottle . . . the surroundings\n(B) the surroundings . . . each bottle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.022486686706543 }, { "id": "identity_117078", "image": "A-OKVQA/images/360747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the truck?" }, { "from": "gpt", "value": "The answer is A horse" } ], "clip_score": 22.890625, "loss": 5.002538681030273 }, { "id": "identity_306949", "image": "PMC-VQA/images/PMC6611056_tca13066-fig-0001_4492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging study used to visualize this?\nChoices:\n(A) Magnetic resonance imaging (MRI).\n(B) Computed tomography (CT) scan.\n(C) X-ray.\n(D) Positron emission tomography (PET) scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.277949333190918 }, { "id": "identity_202259", "image": "A-OKVQA/images/272957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.046875, "loss": 4.24237060546875 }, { "id": "identity_280086", "image": "PMC-VQA/images/PMC8034399_F4_412878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiographic presentation of the lesion depicted in the CT images?\nOptions:\n(A) Lytic\n(B) Well-circumscribed\n(C) Multilocular\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 5.133823394775391 }, { "id": "identity_12007", "image": "PMC-VQA/images/PMC4158140_fig1_318878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the computed tomography?\nChoices:\n(A) Tonsillitis \n(B) Sinusitis \n(C) Bronchitis \n(D) Pneumonia " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.96875, "loss": 4.212582588195801 }, { "id": "identity_278523", "image": "PMC-VQA/images/PMC8117624_Fig2_439345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images are displayed in the top row? \n(A) Brain tumor images \n(B) X-ray images \n(C) Normal MRI images \n(D) CT scan images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.3125, "loss": 4.334856986999512 }, { "id": "identity_279041", "image": "TabMWP/images/34990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nOptions:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.15625, "loss": 4.126959323883057 }, { "id": "identity_1268", "image": "FigureQA/images/46212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dim Gray greater than Chartreuse?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.578125, "loss": 4.101086616516113 }, { "id": "identity_24692", "image": "PMC-VQA/images/PMC5126097_fig5_99899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrowheads in the merged image indicate?\nChoices:\n(A) Overlap between CD3 and CD146 expressions \n(B) Overlap between renin and CD3 expressions \n(C) Overlap between renin and CD146 expressions \n(D) Overlap between CD3 and renin expressions " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.71875, "loss": 4.112910270690918 }, { "id": "identity_289951", "image": "A-OKVQA/images/25996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which movement causes the child to smile? \nOptions:\n(A) nodding\n(B) earthquake\n(C) rocking\n(D) sliding" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.4609375, "loss": 4.021161079406738 }, { "id": "identity_337678", "image": "A-OKVQA/images/191925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is this game being played?\nChoices:\n(A) afternoon\n(B) noon\n(C) night\n(D) morning" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.8828125, "loss": 4.118880271911621 }, { "id": "identity_216416", "image": "VQA2.0/images/66459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.328125, "loss": 4.670688629150391 }, { "id": "identity_186750", "image": "PMC-VQA/images/PMC4631848_fig3_440391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the kidneys in the given scenario?" }, { "from": "gpt", "value": "The answer is Normal condition" } ], "clip_score": 23.671875, "loss": 5.015460014343262 }, { "id": "identity_197237", "image": "A-OKVQA/images/294866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.359375, "loss": 4.310441493988037 }, { "id": "identity_148891", "image": "DocVQA/images/tffn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the TRUEGLIDE technology as mentioned in the text?" }, { "from": "gpt", "value": "The answer is More fluid range of natural motion" } ], "clip_score": 26.296875, "loss": 4.515745639801025 }, { "id": "identity_20814", "image": "A-OKVQA/images/397665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What devices would the word at the top be associated with?\nChoices:\n(A) microwaves\n(B) kettle\n(C) cell phones\n(D) microphones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.4140625, "loss": 4.819386959075928 }, { "id": "identity_190242", "image": "VQA2.0/images/149921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrella in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.0703125, "loss": 4.252655506134033 }, { "id": "identity_325283", "image": "A-OKVQA/images/248250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is probably causing a bad smell in the market area?\nChoices:\n(A) limburger cheese\n(B) cow\n(C) manure\n(D) dead fish" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.1015625, "loss": 4.077380657196045 }, { "id": "identity_330345", "image": "PMC-VQA/images/PMC4033054_F4_291575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What brain imaging method was used to create this image?\nChoices:\n(A) Positron Emission Tomography (PET)\n(B) Electroencephalography (EEG)\n(C) Magnetoencephalography (MEG)\n(D) Magnetic Resonance Imaging (MRI)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.84375, "loss": 4.00498104095459 }, { "id": "identity_286606", "image": "PMC-VQA/images/PMC7448131_fig2-2050313X20952996_240209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the abscess situated?\nChoices:\n(A) Right axilla\n(B) Left axilla\n(C) Abdomen\n(D) Leg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.817722320556641 }, { "id": "identity_207623", "image": "PMC-VQA/images/PMC4877350_Fig14_33157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 22.0, "loss": 4.285701751708984 }, { "id": "identity_44647", "image": "DocVQA/images/trkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Associate Dean for Research and Graduate Study?" }, { "from": "gpt", "value": "The answer is Theodore R. Vallance" } ], "clip_score": 19.1875, "loss": 4.210721015930176 }, { "id": "identity_279253", "image": "Super-CLEVR/images/superCLEVR_new_000376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shiny motorbike on the right the same size as the metallic motorbike on the left of the large green bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 34.65625, "loss": 4.940618515014648 }, { "id": "identity_36551", "image": "PMC-VQA/images/PMC7532002_fig2_265128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the rectangle in image A? \nChoices:\n(A) Brainstem \n(B) Spinal cord \n(C) Fiber tract \n(D) Cerebellum " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.331416606903076 }, { "id": "identity_115218", "image": "TextVQA/images/c76f0ac2816f7c69.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 10.7109375, "loss": 4.198884010314941 }, { "id": "identity_282076", "image": "IconQA/images/64136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on? \nOptions:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 4.169847011566162 }, { "id": "identity_123021", "image": "A-OKVQA/images/17409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the liquid in the glass?" }, { "from": "gpt", "value": "The answer is Pale yellow or golden" } ], "clip_score": 18.53125, "loss": 4.518496036529541 }, { "id": "identity_294576", "image": "ScienceQA/images/6493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) hard\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.145336627960205 }, { "id": "identity_274179", "image": "PMC-VQA/images/PMC6713910_F6_30138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to differentiate between various cell types in the brain?\nChoices:\n(A) X-ray imaging\n(B) MRI\n(C) Double-labeling immunofluorescence\n(D) CT scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.106487274169922 }, { "id": "identity_219811", "image": "A-OKVQA/images/431954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible badges or patches are there on the person's costume?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.34375, "loss": 4.633940696716309 }, { "id": "identity_67480", "image": "ScienceQA/images/5665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are included in the highlighted region?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.9375, "loss": 5.671566963195801 }, { "id": "identity_32515", "image": "PMC-VQA/images/PMC7133459_f10_145402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is shown in the zoom-in view in (b)? \nChoices:\n(A) cellular material \n(B) palisade fabric \n(C) interspace smaller filamentous structures \n(D) unaltered interior sinter samples " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.125, "loss": 4.120907306671143 }, { "id": "identity_294939", "image": "PMC-VQA/images/PMC7379153_F7_219107.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the upper panel?\nChoices:\n(A) Activity for untrustworthy faces in the sighted fields\n(B) Activity for trustworthy faces in the blind fields\n(C) Activity for trustworthy faces in the sighted and blind fields\n(D) Activity for neutral faces in the sighted and blind fields." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.78125, "loss": 4.179862976074219 }, { "id": "identity_27275", "image": "PMC-VQA/images/PMC7294455_F20_192412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structures are labeled as D and E? \nChoices:\n(A) SolC and SolM \n(B) SolV and SolVL \n(C) AP and TS \n(D) SolC and SolVL " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.078125, "loss": 4.135035514831543 }, { "id": "identity_286010", "image": "AI2D/images/4483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the image that represents the word \"erose\":\n(A) N\n(B) L\n(C) D\n(D) Y" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.8671875, "loss": 4.326625347137451 }, { "id": "identity_120804", "image": "VQA-AS/images/3897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are there on the floor?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.875, "loss": 4.290015697479248 }, { "id": "identity_290202", "image": "IconQA/images/36146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, given that the first picture is a sub? Options are fish, sub, and turtle.\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.75, "loss": 4.407756805419922 }, { "id": "identity_328917", "image": "A-OKVQA/images/508985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place?\nChoices:\n(A) forest\n(B) underground\n(C) city\n(D) sky" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.546875, "loss": 4.232203483581543 }, { "id": "identity_104077", "image": "TextVQA/images/f5a779491168b7e5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the text on the orange circle?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.59375, "loss": 4.256063461303711 }, { "id": "identity_29835", "image": "A-OKVQA/images/183913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What governing body uses the flag closest to the camera?\nChoices:\n(A) caricom\n(B) belgium\n(C) european union\n(D) united states" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.0078125, "loss": 4.639279842376709 }, { "id": "identity_314075", "image": "PMC-VQA/images/PMC8140552_FIG6_447381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality seen in this? \nChoices:\n(A) Lung nodule \n(B) Pneumothorax \n(C) Pleural effusion \n(D) Pulmonary embolism" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.203125, "loss": 4.553728103637695 }, { "id": "identity_27928", "image": "A-OKVQA/images/163091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transportation is shown?\nChoices:\n(A) air\n(B) water\n(C) land\n(D) rail" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5625, "loss": 4.085981845855713 }, { "id": "identity_334057", "image": "IconQA/images/82489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.359375, "loss": 4.363152980804443 }, { "id": "identity_228525", "image": "Super-CLEVR/images/superCLEVR_new_000238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object that is to the left of the yellow object, which is to the right of the small red metallic object in front of the small bicycle behind the tiny brown matte object?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 26.375, "loss": 4.065791130065918 }, { "id": "identity_39047", "image": "DVQA/images/bar_train_00168561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the highest number of units sold for the 'face' category?" }, { "from": "gpt", "value": "The answer is face" } ], "clip_score": 27.953125, "loss": 4.338900566101074 }, { "id": "identity_13366", "image": "ScienceQA/images/329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) sticky\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.65625, "loss": 4.760688781738281 }, { "id": "identity_318977", "image": "ScienceQA/images/18526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms is a producer in this food web?\nChoices:\n(A) bilberry\n(B) earthworm\n(C) snowy owl" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.890625, "loss": 4.309682846069336 }, { "id": "identity_29447", "image": "PMC-VQA/images/PMC6298256_f1_412562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organs appear brighter after the injection of the contrast agent? \nChoices:\n(A) Kidneys and bladder \n(B) Stomach and intestines \n(C) Spleen and liver \n(D) Brain and spinal cord " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.765625, "loss": 4.874564170837402 }, { "id": "identity_217061", "image": "A-OKVQA/images/448983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horse-drawn carriages are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.609375, "loss": 4.4749274253845215 }, { "id": "identity_255857", "image": "VizWiz/images/VizWiz_train_00009568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the design elements and visible features, what might be the historical or cultural significance of this object, and what clues suggest its possible use?" }, { "from": "gpt", "value": "The answer is Traditional Japanese ice shaver with cultural significance" } ], "clip_score": 20.125, "loss": 4.231578350067139 }, { "id": "identity_5156", "image": "AI2D/images/4718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the appearance of a circle or sphere?\nChoices:\n(A) rounded\n(B) cordate narrow\n(C) ovate\n(D) broad elliptic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.078125, "loss": 4.018546104431152 }, { "id": "identity_270621", "image": "FigureQA/images/70294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Sienna or Teal?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.02664852142334 }, { "id": "identity_124903", "image": "PMC-VQA/images/PMC8624097_medicina-57-01276-f001_124169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the blood vessels in the retina?" }, { "from": "gpt", "value": "The answer is Normal, no apparent abnormalities" } ], "clip_score": 34.15625, "loss": 4.0885910987854 }, { "id": "identity_326776", "image": "A-OKVQA/images/143576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is underneath the pot being stirred?\nChoices:\n(A) flame\n(B) cheese\n(C) ice\n(D) animals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.6875, "loss": 4.271319389343262 }, { "id": "identity_5895", "image": "AI2D/images/202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. Which organism is a source of energy to mouse?\nChoices:\n(A) a fox\n(B) an eagle\n(C) an owl\n(D) the grass" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.390625, "loss": 4.241804599761963 }, { "id": "identity_13368", "image": "FigureQA/images/46080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.125, "loss": 4.251755714416504 }, { "id": "identity_109283", "image": "Super-CLEVR/images/superCLEVR_new_001715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.515625, "loss": 4.351008415222168 }, { "id": "identity_265596", "image": "ScienceQA/images/8639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.077389717102051 }, { "id": "identity_15575", "image": "ScienceQA/images/18626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this scarf made of?\nChoices:\n(A) wool\n(B) asphalt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.53125, "loss": 4.31975793838501 }, { "id": "identity_73221", "image": "ScienceQA/images/12975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can you infer about the word \"antebellum\" based on the meanings of \"ante\" and \"bellum\"?" }, { "from": "gpt", "value": "The answer is Before the war" } ], "clip_score": 24.125, "loss": 4.441280364990234 }, { "id": "identity_29038", "image": "PMC-VQA/images/PMC6039044_pone.0200473.g002_340231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are the coronal reformations of CT in figure B? \nChoices:\n(A) gray-scale CT \n(B) color-coded DECT \n(C) MRI \n(D) ultrasound " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 4.036006450653076 }, { "id": "identity_24296", "image": "A-OKVQA/images/252188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What venue is this likely to be?\nChoices:\n(A) art gallery\n(B) restaurant\n(C) hotel\n(D) department store" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.328125, "loss": 4.4641008377075195 }, { "id": "identity_127412", "image": "Super-CLEVR/images/superCLEVR_new_000519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned between the airplane and the green truck?" }, { "from": "gpt", "value": "The answer is A school bus" } ], "clip_score": 25.71875, "loss": 4.075174808502197 }, { "id": "identity_296700", "image": "A-OKVQA/images/123535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of sheep is closest to the rear of the blue trailer? \n(A) ewe\n(B) lamb\n(C) ram\n(D) wether" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.15625, "loss": 4.28430700302124 }, { "id": "identity_299440", "image": "ScienceQA/images/20897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the correct definition for the word \"antebellum\":\n(A) after the long peace\n(B) before the feast\n(C) after the election\n(D) before the war" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.046875, "loss": 4.387585639953613 }, { "id": "identity_3676", "image": "ScienceQA/images/18657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Jefferson City\n(B) Richmond\n(C) Columbus\n(D) Saint Paul" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.3125, "loss": 4.037432670593262 }, { "id": "identity_266427", "image": "Super-CLEVR/images/superCLEVR_new_000241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shapes of the large shiny object and the brown shiny object identical?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.9375, "loss": 5.292150497436523 }, { "id": "identity_135297", "image": "DocVQA/images/fqlm0018_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last event on the agenda before the luncheon concludes?" }, { "from": "gpt", "value": "The answer is Jeff Manual deputizes RJR's United Way Solicitors" } ], "clip_score": 30.515625, "loss": 4.719388008117676 }, { "id": "identity_3479", "image": "AI2D/images/649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: what is the layer in the middle of the diagram?\nChoices:\n(A) outer core\n(B) crust\n(C) inner core\n(D) mantle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.421875, "loss": 4.019320487976074 }, { "id": "identity_332263", "image": "PMC-VQA/images/PMC9417040_pone.0272639.g013_386901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area of the body depicted in the image?\nChoices:\n(A) Thoracic area\n(B) Head area\n(C) Abdominal area\n(D) Limbs area" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.515625, "loss": 4.190482139587402 }, { "id": "identity_274644", "image": "AI2D/images/778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to midnight in the given options: A, B, C, or D?\nChoices:\n(A) D\n(B) B\n(C) A\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.428776741027832 }, { "id": "identity_74255", "image": "TQA/images/abc_question_images/parts_skeleton_17270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What bone is indicated by label 'T' in the human skeleton diagram?" }, { "from": "gpt", "value": "The answer is First rib" } ], "clip_score": 32.25, "loss": 4.099215030670166 }, { "id": "identity_299766", "image": "DocVQA/images/skww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which program spent $195,000?" }, { "from": "gpt", "value": "The answer is Honoraria" } ], "clip_score": 30.84375, "loss": 4.920709133148193 }, { "id": "identity_184852", "image": "A-OKVQA/images/440534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing helmets in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.765625, "loss": 4.790258884429932 }, { "id": "identity_291062", "image": "A-OKVQA/images/59281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the people wearing bright yellow hats?\nChoices:\n(A) cosplay\n(B) fashion\n(C) visibility\n(D) punishment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.953125, "loss": 4.225250720977783 }, { "id": "identity_279007", "image": "DVQA/images/bar_train_00048336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, starting from the bottom, is labeled as the eighth?" }, { "from": "gpt", "value": "The answer is set" } ], "clip_score": 17.0625, "loss": 4.203981399536133 }, { "id": "identity_259720", "image": "DocVQA/images/llbx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who will be responsible for coordinating all areas related to the transfer of the G-H sales office to Denver, and what specific responsibilities will they have?" }, { "from": "gpt", "value": "The answer is Mary Kennedy; office layout, furniture, communications, etc" } ], "clip_score": 20.859375, "loss": 4.281366348266602 }, { "id": "identity_336459", "image": "A-OKVQA/images/311471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which way will the pink car go?\nChoices:\n(A) turn right\n(B) turn left\n(C) back up\n(D) go straight" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5546875, "loss": 4.4174628257751465 }, { "id": "identity_297820", "image": "PMC-VQA/images/PMC6774298_jgre21178-fig-0003_45044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What feature in the image is covered by etched units? \nChoices:\n(A) The open-basin paleolake \n(B) Inverted channels \n(C) The outlet \n(D) The inlet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.546875, "loss": 4.60561990737915 }, { "id": "identity_25066", "image": "A-OKVQA/images/306716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what city is this woman boarding the bus?\nChoices:\n(A) kyoto\n(B) osaka\n(C) kobe\n(D) tokyo" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.75, "loss": 4.064326763153076 }, { "id": "identity_289784", "image": "PMC-VQA/images/PMC5570274_pone.0183446.g004_206609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For the cardiac function analysis, which axis is being utilized?\nChoices:\n(A) Vertical long axis\n(B) Short axis\n(C) Transverse plane\n(D) Horizontal long axis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.421875, "loss": 4.7853264808654785 }, { "id": "identity_44418", "image": "DocVQA/images/lqdg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode used for the transmission at 3:18PM?" }, { "from": "gpt", "value": "The answer is EC" } ], "clip_score": 22.796875, "loss": 4.685847282409668 }, { "id": "identity_112352", "image": "VQA2.0/images/329245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many carrots are visible in the soup?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.71875, "loss": 4.272759437561035 }, { "id": "identity_291233", "image": "PMC-VQA/images/PMC6945762_Fig2_95209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which color represents the lateral wall?\nChoices:\n(A) Green\n(B) Red\n(C) Blue\n(D) Orange." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.875, "loss": 4.083451747894287 }, { "id": "identity_131390", "image": "VizWiz/images/VizWiz_train_00000697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many glasses can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.859375, "loss": 4.552462100982666 }, { "id": "identity_253480", "image": "IconQA/images/2155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total duration each friend spent walking if Tara walked for 45 minutes, Sarah walked for 30 minutes, and Rosa walked for 1 hour. Who spent the most time walking?" }, { "from": "gpt", "value": "The answer is Rosa" } ], "clip_score": 30.140625, "loss": 4.080955982208252 }, { "id": "identity_290536", "image": "AI2D/images/559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism is depicted in the diagram provided? Is it:\n(A) fish\n(B) Plant\n(C) buffalo\n(D) steak" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 4.20481538772583 }, { "id": "identity_28460", "image": "A-OKVQA/images/300578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman on the right holding in her hand?\nChoices:\n(A) squash\n(B) potatoes\n(C) cabbage\n(D) watermelon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.796875, "loss": 4.285680294036865 }, { "id": "identity_45243", "image": "FigureQA/images/84828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the lowest value at the starting point on the x-axis?" }, { "from": "gpt", "value": "The answer is Dark Green" } ], "clip_score": 29.515625, "loss": 4.1340413093566895 }, { "id": "identity_66706", "image": "ScienceQA/images/18733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the bison in this food chain?" }, { "from": "gpt", "value": "The answer is primary consumer" } ], "clip_score": 39.875, "loss": 4.227425575256348 }, { "id": "identity_279532", "image": "Super-CLEVR/images/superCLEVR_new_001811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any big metallic objects located to the right of the small object in front of the brown object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.15625, "loss": 5.772571563720703 }, { "id": "identity_6465", "image": "IconQA/images/88239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) cube\n(B) cone\n(C) sphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.578125, "loss": 4.026115894317627 }, { "id": "identity_9575", "image": "DVQA/images/bar_train_00063352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the sixth stack of bars from the bottom?" }, { "from": "gpt", "value": "The answer is fuel" } ], "clip_score": 28.453125, "loss": 4.209348201751709 }, { "id": "identity_295726", "image": "PMC-VQA/images/PMC4313935_pone.0116986.g008_354510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the arrows shown in the image?\nChoices:\n(A) Highlighting contrast difference between the images,\n(B) Showing intensity difference between the images,\n(C) Indicating regions with greater structure similarity,\n(D) Representing gradient direction difference between the images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.09375, "loss": 4.21211051940918 }, { "id": "identity_289292", "image": "A-OKVQA/images/178505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location around the world would you most likely find a place similar to where these people are currently located? \nChoices:\n(A) Siberia\n(B) Mexico\n(C) Iceland\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.44921875, "loss": 4.4398698806762695 }, { "id": "identity_272595", "image": "DocVQA/images/gtjw0081_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the current status of the SPEC?" }, { "from": "gpt", "value": "The answer is Retired" } ], "clip_score": 23.234375, "loss": 4.318362712860107 }, { "id": "identity_308635", "image": "FigureQA/images/70540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6328125, "loss": 4.5650315284729 }, { "id": "identity_323783", "image": "A-OKVQA/images/388179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the people wearing orange vests?\nChoices:\n(A) camouflage\n(B) visibility\n(C) fashion\n(D) costume" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1953125, "loss": 4.892716407775879 }, { "id": "identity_22245", "image": "A-OKVQA/images/440534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two people both directly above and below the snowboarder doing?\nChoices:\n(A) clapping\n(B) cheering\n(C) filming\n(D) spectating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.875, "loss": 4.448182106018066 }, { "id": "identity_328312", "image": "PMC-VQA/images/PMC7552380_Fig2_270748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the Perls staining show in this image?\nChoices:\n(A) Presence of hepatocytes \n(B) Necrosis of hepatocytes \n(C) Ductular proliferation \n(D) Portal spaces" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.0625, "loss": 4.494414806365967 }, { "id": "identity_330940", "image": "PMC-VQA/images/PMC4330230_Fig8_358741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in this image?\nChoices:\n(A) Nerves\n(B) Bones\n(C) Veins\n(D) Arteries" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.375, "loss": 4.649548530578613 }, { "id": "identity_328554", "image": "A-OKVQA/images/409143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the animals in the image give birth to?\nChoices:\n(A) calves\n(B) lambs\n(C) kids\n(D) children" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.031038761138916 }, { "id": "identity_315069", "image": "FigureQA/images/77551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the Purple curve the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.078125, "loss": 4.0688700675964355 }, { "id": "identity_33642", "image": "PMC-VQA/images/PMC7270322_Fig2_183898.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical imaging technique used in this case? \nChoices:\n(A) MRI \n(B) CT scan \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.546875, "loss": 4.075065612792969 }, { "id": "identity_281516", "image": "FigureQA/images/88824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 4.657325744628906 }, { "id": "identity_220238", "image": "A-OKVQA/images/149679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen parked in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.34375, "loss": 4.130461692810059 }, { "id": "identity_287327", "image": "DVQA/images/bar_train_00185527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.453125, "loss": 4.332900047302246 }, { "id": "identity_117973", "image": "VQA2.0/images/268533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vases can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.171875, "loss": 4.816152095794678 }, { "id": "identity_184571", "image": "A-OKVQA/images/73445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding in the carriage?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.71875, "loss": 4.1648335456848145 }, { "id": "identity_213179", "image": "A-OKVQA/images/519850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the dog's collar?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.3125, "loss": 4.0658650398254395 }, { "id": "identity_269232", "image": "PMC-VQA/images/PMC6497285_pone.0216409.g001_474442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which regions of the brain is hypometabolism more prominent? \nChoices:\n(A) Left cerebellar hemisphere\n(B) Right cerebellar hemisphere\n(C) Frontal and parietotemporal areas in both hemispheres\n(D) Frontal and parietotemporal areas in the right hemisphere" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.96875, "loss": 4.116766452789307 }, { "id": "identity_60985", "image": "DocVQA/images/rylg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does Mr. Fenwick request in the letter?" }, { "from": "gpt", "value": "The answer is A brief statement on research progress" } ], "clip_score": 20.671875, "loss": 4.081735610961914 }, { "id": "identity_286344", "image": "PMC-VQA/images/PMC7285432_Fig4_188948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being demonstrated by the enhancement of the lesion?\nChoices:\n(A) a cavity\n(B) a tumor\n(C) a blood clot\n(D) a muscle tear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 4.15865421295166 }, { "id": "identity_311344", "image": "ScienceQA/images/19612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system accurate or inaccurate?\nThe smallest planet is composed mainly of rock.\nChoices:\n(A) accurate\n(B) inaccurate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.765625, "loss": 4.001752853393555 }, { "id": "identity_270492", "image": "Super-CLEVR/images/superCLEVR_new_000116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the red shiny truck equal to those of the cyan airliner?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.734375, "loss": 5.1111297607421875 }, { "id": "identity_11352", "image": "VizWiz/images/VizWiz_train_00008371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this shirt say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.421875, "loss": 4.231027126312256 }, { "id": "identity_25105", "image": "ScienceQA/images/9698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) sticky\n(B) rough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.125, "loss": 4.388020038604736 }, { "id": "identity_297089", "image": "PMC-VQA/images/PMC7537103_Fig1_266648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which area is the extension of the mass visible in the tomography image?\nChoices:\n(A) Abdomen\n(B) Supraclavicular region\n(C) Axillary region\n(D) Within the lung" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.109375, "loss": 4.89628791809082 }, { "id": "identity_202078", "image": "PMC-VQA/images/PMC5893487_Fig21_299403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 22.75, "loss": 4.170499801635742 }, { "id": "identity_130223", "image": "A-OKVQA/images/374685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the bags inside the cages?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.75, "loss": 4.043757438659668 }, { "id": "identity_15723", "image": "PMC-VQA/images/PMC7519446_fig3_261632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the study? \nChoices:\n(A) Rats \n(B) Mice \n(C) Rabbits \n(D) Pigs " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.234375, "loss": 5.086003303527832 }, { "id": "identity_289159", "image": "A-OKVQA/images/284014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Of the following options, which resource do these plants require in minimal amounts? \n(A) sun\n(B) love\n(C) water\n(D) heat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.625, "loss": 4.449987888336182 }, { "id": "identity_4521", "image": "DocVQA/images/nywf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the CONNECTION ID?" }, { "from": "gpt", "value": "The answer is HUA MEI" } ], "clip_score": 22.96875, "loss": 4.05063009262085 }, { "id": "identity_336268", "image": "A-OKVQA/images/193640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The person in green has a hairstyle often compared to which animal?\nChoices:\n(A) pig\n(B) horse\n(C) cow\n(D) wolf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.859375, "loss": 4.203856468200684 }, { "id": "identity_1072", "image": "PMC-VQA/images/PMC6333181_acm212498-fig-0001_422859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What view does the sagittal image show? \nChoices:\n(A) Anterior view \n(B) Posterior view \n(C) Side view \n(D) Bottom view " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.625, "loss": 4.407046318054199 }, { "id": "identity_305547", "image": "Super-CLEVR/images/superCLEVR_new_000716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large object behind the red motorbike shaped like the brown metallic object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.78125, "loss": 5.1177473068237305 }, { "id": "identity_25858", "image": "PMC-VQA/images/PMC4602428_F7_432145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the lymph nodes with increased metabolic activity? \nChoices:\n(A) Left supraclavicular fossa \n(B) Right supraclavicular fossa \n(C) Mediastinum \n(D) Abdomen " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.325216293334961 }, { "id": "identity_27899", "image": "VQA-AS/images/6976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the plant?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 16.28125, "loss": 4.628358840942383 }, { "id": "identity_23581", "image": "PMC-VQA/images/PMC6372798_fig1_434902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located? \nChoices:\n(A) Left parietal lobe \n(B) Right parietal lobe \n(C) Frontal lobe \n(D) Temporal lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.078125, "loss": 4.399376392364502 }, { "id": "identity_30612", "image": "PMC-VQA/images/PMC4247313_f2-etm-09-01-0117_340163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the abnormal mass located?\nChoices:\n(A) Lower lobe of the lung \n(B) Middle lobe of the lung \n(C) Upper lobe of the lung \n(D) Diaphragm " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.84375, "loss": 4.12285852432251 }, { "id": "identity_275057", "image": "Super-CLEVR/images/superCLEVR_new_000357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the materials used in the construction of the cyan aeroplane and the yellow chopper the same?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.1875, "loss": 4.9526896476745605 }, { "id": "identity_22038", "image": "PMC-VQA/images/PMC7960597_Fig1_391687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the injury shown in the MRI Scan? \nChoices:\n(A) Skull fracture \n(B) Arm fracture \n(C) Leg fracture \n(D) Rib fracture " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.656937122344971 }, { "id": "identity_302860", "image": "A-OKVQA/images/269249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body type does the person with the red trunks have?\nChoices:\n(A) gangly\n(B) willowy\n(C) svelte\n(D) husky" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.65234375, "loss": 4.6122331619262695 }, { "id": "identity_310079", "image": "A-OKVQA/images/417983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person looking at?\nChoices:\n(A) grass\n(B) trees\n(C) flowers\n(D) elephants" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.78125, "loss": 4.359890460968018 }, { "id": "identity_14421", "image": "PMC-VQA/images/PMC7382072_Fig8_220157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method was used to visualize the cells? \nChoices:\n(A) Wright-Giemsa stain \n(B) Masson's trichrome stain \n(C) Hematoxylin and Eosin stain \n(D) Toluidine blue stain " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.984375, "loss": 4.376758098602295 }, { "id": "identity_284129", "image": "PMC-VQA/images/PMC4310314_fig2_353794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image? \nOptions:\n(A) CT scan\n(B) PET scan\n(C) MRI scan\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 4.5159101486206055 }, { "id": "identity_325186", "image": "IconQA/images/96392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a rocket. Which image is fourth?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.640625, "loss": 4.12616491317749 }, { "id": "identity_197253", "image": "A-OKVQA/images/234994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umpires can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.53125, "loss": 4.290747165679932 }, { "id": "identity_15328", "image": "FigureQA/images/42984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.109375, "loss": 4.026914119720459 }, { "id": "identity_315579", "image": "Super-CLEVR/images/superCLEVR_new_001056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that are the same shape as this large brown rubber thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 14.359375, "loss": 4.945029258728027 }, { "id": "identity_298024", "image": "A-OKVQA/images/560222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which actor shares the same last name as this street?\nOptions:\n(A) John Goodman\n(B) Christian Slater\n(C) Tom Arnold\n(D) Ben Kingsley" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.734375, "loss": 4.033296585083008 }, { "id": "identity_12228", "image": "ScienceQA/images/3563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Delaware?\nChoices:\n(A) Cheyenne\n(B) Austin\n(C) Dover\n(D) Georgetown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.09375, "loss": 4.142449378967285 }, { "id": "identity_291886", "image": "PMC-VQA/images/PMC5371205_fig1_146609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What findings were observed in the left upper lobe on the axial CT scan?\nChoices:\n(A) Calcification and smooth pleural thickening \n(B) Consolidation and regular pleural thickening \n(C) Fibrosis and irregular pleural thickening \n(D) Nodules and paratracheal air cysts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.09375, "loss": 4.002517223358154 }, { "id": "identity_296164", "image": "Super-CLEVR/images/superCLEVR_new_000091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small matte object behind the metallic truck have the same shape as the brown object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.171875, "loss": 5.331892967224121 }, { "id": "identity_16893", "image": "Super-CLEVR/images/superCLEVR_new_001935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a rubber car that is behind the small thing that is left of the brown car that is behind the big purple rubber bicycle; what is its color?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 29.203125, "loss": 4.101457595825195 }, { "id": "identity_327215", "image": "IconQA/images/97603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape flat or not flat?\nChoices:\n(A) not flat\n(B) flat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.53125, "loss": 4.0133867263793945 }, { "id": "identity_315468", "image": "PMC-VQA/images/PMC7830087_diagnostics-11-00129-f001_351374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part is depicted in the maximum intensity projection image (B)?\n\nChoices:\n(A) Abdomen\n(B) Brain\n(C) Chest\n(D) Pelvis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.4375, "loss": 4.427905082702637 }, { "id": "identity_322322", "image": "Super-CLEVR/images/superCLEVR_new_000307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any metallic vehicles of the same color as the tiny metal motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.265625, "loss": 5.4316182136535645 }, { "id": "identity_313565", "image": "VQA2.0/images/127298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person standing?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.515625, "loss": 4.206425189971924 }, { "id": "identity_328190", "image": "ScienceQA/images/1818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Michigan\n(B) Rhode Island\n(C) West Virginia\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.25, "loss": 4.1992597579956055 }, { "id": "identity_30691", "image": "PMC-VQA/images/PMC5876754_sensors-18-00791-f005_295659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What software was used to generate the 3D mesh model? \nChoices:\n(A) AutoCAD \n(B) Photoshop \n(C) Reality Capture \n(D) SketchUp " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.171875, "loss": 4.331470489501953 }, { "id": "identity_33526", "image": "A-OKVQA/images/256852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location does this bus run?\nChoices:\n(A) rural\n(B) city\n(C) suburbs\n(D) farm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.484375, "loss": 4.656205177307129 }, { "id": "identity_106512", "image": "A-OKVQA/images/107960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many side mirrors does the vehicle have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.796875, "loss": 4.077495574951172 }, { "id": "identity_298643", "image": "ScienceQA/images/20083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object possess: hard or soft?\nChoices:\n(A) hard\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.3828125, "loss": 4.247580528259277 }, { "id": "identity_35917", "image": "PMC-VQA/images/PMC4623697_F3_438097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the scale bar represent? \nChoices:\n(A) Time \n(B) Temperature \n(C) Signal intensity \n(D) Distance " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.16015625, "loss": 4.779967308044434 }, { "id": "identity_297452", "image": "TQA/images/question_images/fossils_6895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is required for fossilization to begin in fossils?\nChoices:\n(A) Oil\n(B) Natural Gas\n(C) Sedimentary rocks\n(D) Coal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.5, "loss": 4.118905067443848 }, { "id": "identity_3709", "image": "AI2D/images/1388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: WHICH PART IS TUBE LIKE STRUCTURE?\nChoices:\n(A) E\n(B) C\n(C) F\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.671875, "loss": 4.491499900817871 }, { "id": "identity_328616", "image": "PMC-VQA/images/PMC8906199_acm213507-fig-0002_216654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the blue and magenta contours indicate in the CT images?\nChoices:\n(A) Organs\n(B) Tumors\n(C) Blood vessels\n(D) Bones" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.1875, "loss": 4.516115665435791 }, { "id": "identity_25638", "image": "Super-CLEVR/images/superCLEVR_new_000534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The tiny shiny object that is on the right side of the small object that is in front of the tiny gray sedan is what shape?" }, { "from": "gpt", "value": "The answer is sedan" } ], "clip_score": 28.921875, "loss": 5.525174617767334 }, { "id": "identity_317323", "image": "FigureQA/images/75340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.9140625, "loss": 4.529109001159668 }, { "id": "identity_313747", "image": "A-OKVQA/images/336552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the middle doing?\nChoices:\n(A) posing\n(B) threatening other\n(C) throwing ball\n(D) falling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.046875, "loss": 4.697165489196777 }, { "id": "identity_284436", "image": "A-OKVQA/images/401917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What profession is most likely held by the man in the black suit?\nChoices:\n(A) teacher\n(B) model\n(C) news reporter\n(D) taxi driver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.421314716339111 }, { "id": "identity_266158", "image": "ScienceQA/images/13901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of West Virginia?\nChoices:\n(A) Oklahoma City\n(B) Springfield\n(C) Charleston\n(D) Huntington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.796875, "loss": 4.058656215667725 }, { "id": "identity_31780", "image": "PMC-VQA/images/PMC7215337_ijms-21-02817-f005_166971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the inset at a higher magnification?\nChoices:\n(A) Rudimentary alveolar wall \n(B) Chlamydial inclusions \n(C) Interlobular septum \n(D) Plasma cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.296875, "loss": 4.802309989929199 }, { "id": "identity_20990", "image": "Super-CLEVR/images/superCLEVR_new_000371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is both in front of the large rubber bus and on the left side of the tiny school bus?" }, { "from": "gpt", "value": "The answer is regular bus" } ], "clip_score": 26.53125, "loss": 5.189757347106934 }, { "id": "identity_32835", "image": "PMC-VQA/images/PMC6664500_Fig1_15932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what level is the first cranial slide taken? \nChoices:\n(A) L5 \n(B) The common iliac joint \n(C) Above the external iliac node \n(D) Below the internal obturator artery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 4.30806827545166 }, { "id": "identity_28490", "image": "A-OKVQA/images/556073.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brand name of the oral care product shown here?\nChoices:\n(A) glide\n(B) oral-b\n(C) crest\n(D) colgate" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.34375, "loss": 4.143734931945801 }, { "id": "identity_265041", "image": "VQA2.0/images/570075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which part of the world was this picture most likely taken?" }, { "from": "gpt", "value": "The answer is us" } ], "clip_score": 17.859375, "loss": 4.497546195983887 }, { "id": "identity_2758", "image": "FigureQA/images/81241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Gold have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.734375, "loss": 4.589914798736572 }, { "id": "identity_112937", "image": "A-OKVQA/images/567572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct objects can be seen on the glass surface behind the doll?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.484375, "loss": 4.296977996826172 }, { "id": "identity_242447", "image": "FigureQA/images/60396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest value at x = 100, and what is the difference between this value and the value of Dark Green at x = 100?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.671875, "loss": 4.50060510635376 }, { "id": "identity_276701", "image": "PMC-VQA/images/PMC2803997_F1_54397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the osteochondral mass located in the left knee? \n(A) medial, \n(B) lateral, \n(C) anterior, \n(D) posterior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.76932954788208 }, { "id": "identity_305587", "image": "ScienceQA/images/7112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Charleston\n(B) Columbia\n(C) Frankfort\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.322385311126709 }, { "id": "identity_24830", "image": "PMC-VQA/images/PMC5954786_Fig2_314999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image with the red arrows?\nChoices:\n(A) A local relapse\n(B) Metastatic retroperitoneal lymph node \n(C) Both A and B\n(D) Normal lymph node" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.265625, "loss": 4.417730331420898 }, { "id": "identity_15956", "image": "ScienceQA/images/15638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Haiti\n(B) Jamaica\n(C) Saint Kitts and Nevis\n(D) Cuba" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.556929111480713 }, { "id": "identity_22391", "image": "PMC-VQA/images/PMC6378808_F2_437391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being segmented in the image? \nChoices:\n(A) White matter \n(B) Cerebrospinal fluid \n(C) Gray matter \n(D) Blood vessels " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.671875, "loss": 4.198784828186035 }, { "id": "identity_250868", "image": "A-OKVQA/images/262985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the country of origin for each flag on the boats and then determine the continent these countries belong to" }, { "from": "gpt", "value": "The answer is Europe" } ], "clip_score": 16.875, "loss": 4.3355302810668945 }, { "id": "identity_153395", "image": "ScienceQA/images/8085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the country directly north of the highlighted country?" }, { "from": "gpt", "value": "The answer is Bolivia" } ], "clip_score": 24.734375, "loss": 4.12653923034668 }, { "id": "identity_113420", "image": "TextVQA/images/0bd60dc408159a20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.390625, "loss": 4.686262607574463 }, { "id": "identity_281326", "image": "PMC-VQA/images/PMC4101959_fig6_306880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the patient's trauma based on the given choices?\nChoices:\n(A) Left hemisphere injury \n(B) Acute left hemiplegia \n(C) Acute right hemiplegia \n(D) Occipital lobe damage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.046875, "loss": 4.132481575012207 }, { "id": "identity_287469", "image": "DocVQA/images/llcv0228_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document at the top?" }, { "from": "gpt", "value": "The answer is Church Services" } ], "clip_score": 21.375, "loss": 4.123843669891357 }, { "id": "identity_277256", "image": "VizWiz/images/VizWiz_train_00005525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which company does this logo belong?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.78125, "loss": 4.058640956878662 }, { "id": "identity_265745", "image": "AI2D/images/1161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the label for the mitochondrion? \nChoices:\n(A) J\n(B) F\n(C) G\n(D) B" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.46875, "loss": 4.244109153747559 }, { "id": "identity_231658", "image": "TQA/images/abc_question_images/nuclear_energy_18109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of the labels in the diagram represents an element with two protons and two neutrons, and what is the total number of protons and neutrons in the nucleus of this element?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.109375, "loss": 4.014157295227051 }, { "id": "identity_11832", "image": "ScienceQA/images/5833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 13 grams of water vapor per kilogram of air\n(B) 10 grams of water vapor per kilogram of air\n(C) 23 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.90625, "loss": 4.048240661621094 }, { "id": "identity_276722", "image": "DVQA/images/bar_train_00057222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For any dataset, which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is strip" } ], "clip_score": 31.78125, "loss": 4.460013389587402 }, { "id": "identity_321943", "image": "PMC-VQA/images/PMC6237518_f1_396647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the severity of the condition observed in the ultrasound image?\nChoices:\n(A) Grade I\n(B) Grade II\n(C) Grade III\n(D) Grade IV" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.1875, "loss": 4.327719211578369 }, { "id": "identity_333159", "image": "AI2D/images/3359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the flower contains the anther and the filament?\nChoices:\n(A) stamen\n(B) petal\n(C) carpel\n(D) sepal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.96875, "loss": 4.0776872634887695 }, { "id": "identity_182091", "image": "VQA-AS/images/4841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.421875, "loss": 4.3842267990112305 }, { "id": "identity_247589", "image": "AI2D/images/2592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which point represents the position of the Earth during the Winter Solstice, and what is the position of the Earth during the Summer Solstice?" }, { "from": "gpt", "value": "The answer is F and C" } ], "clip_score": 33.21875, "loss": 4.339169025421143 }, { "id": "identity_310030", "image": "FigureQA/images/91596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 4.752983093261719 }, { "id": "identity_305344", "image": "FigureQA/images/51895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.984375, "loss": 4.12860631942749 }, { "id": "identity_288177", "image": "PMC-VQA/images/PMC3772311_F4_231358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted at the lower part of the diagram?\nChoices:\n(A) Overlapping activation clusters \n(B) Abstract language semantics processing \n(C) Gesture semantics processing \n(D) Concrete language semantics processing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.046875, "loss": 4.614015579223633 }, { "id": "identity_280380", "image": "VizWiz/images/VizWiz_train_00007442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the dog?" }, { "from": "gpt", "value": "The answer is tan" } ], "clip_score": 22.375, "loss": 4.298571586608887 }, { "id": "identity_38311", "image": "A-OKVQA/images/153817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which train is the oldest?\nChoices:\n(A) middle left\n(B) leftmost\n(C) middle right\n(D) rightmost" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.046875, "loss": 4.1575422286987305 }, { "id": "identity_270414", "image": "FigureQA/images/70092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the area under the curve with the maximum value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.359375, "loss": 4.329840660095215 }, { "id": "identity_2104", "image": "PlotQA/images/15769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of the X-axis ?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 23.15625, "loss": 4.051581382751465 }, { "id": "identity_15075", "image": "AI2D/images/3643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the muscle on the outside of the heart?\nChoices:\n(A) mitral valve\n(B) left atrium\n(C) cardiac muscle\n(D) vena cava" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.375, "loss": 4.05980920791626 }, { "id": "identity_35808", "image": "A-OKVQA/images/298933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal does closely resemble the kite flown above the beach by the young child?\nChoices:\n(A) bird\n(B) butterfly\n(C) dragonfly\n(D) dragon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.984375, "loss": 4.265270233154297 }, { "id": "identity_145440", "image": "MapQA/images/map_2946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the lowest percentage range for detectives?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 33.9375, "loss": 4.02170467376709 }, { "id": "identity_335495", "image": "PMC-VQA/images/PMC3212535_pone-0027190-g002_115008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is clearly shown in the image?\nChoices:\n(A) Trabeculae \n(B) Crossribs \n(C) Microribs \n(D) Upper laminae" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.78125, "loss": 4.696020126342773 }, { "id": "identity_306478", "image": "VizWiz/images/VizWiz_train_00012404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this an image of?" }, { "from": "gpt", "value": "The answer is paper" } ], "clip_score": 18.671875, "loss": 4.8498077392578125 }, { "id": "identity_290278", "image": "A-OKVQA/images/358100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is the taxi cab in terms of style? Is it a sedan, truck, SUV, or compact?\nChoices:\n(A) sedan\n(B) truck\n(C) suv\n(D) compact" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.203125, "loss": 4.1628618240356445 }, { "id": "identity_263653", "image": "PMC-VQA/images/PMC4558990_Fig4_420416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the composite fundus photograph of the left eye? \nChoices:\n(A) Blue background\n(B) Sunset-glow fundus with pigmentary changes\n(C) White background with vascular changes\n(D) Orange background with peripapillary atrophy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.859375, "loss": 4.3705315589904785 }, { "id": "identity_119175", "image": "VQA2.0/images/158853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 13.7890625, "loss": 4.273488521575928 }, { "id": "identity_186574", "image": "VQA-AS/images/17446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a grill present in the image?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.15625, "loss": 4.015657901763916 }, { "id": "identity_76205", "image": "TQA/images/question_images/cycle_water_1501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the process that collects water back into the body of water?" }, { "from": "gpt", "value": "The answer is Collection" } ], "clip_score": 30.703125, "loss": 4.437495231628418 }, { "id": "identity_18876", "image": "VizWiz/images/VizWiz_train_00018240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe this? " }, { "from": "gpt", "value": "The answer is ipad mini white around screen" } ], "clip_score": 31.234375, "loss": 4.494384288787842 }, { "id": "identity_285837", "image": "A-OKVQA/images/508896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trick is the man performing on the half pipe? Is it a:\n(A) manual\n(B) flip trick\n(C) lip trick\n(D) hand spin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.546875, "loss": 4.473902225494385 }, { "id": "identity_71031", "image": "ScienceQA/images/3464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock texture is displayed in the image?" }, { "from": "gpt", "value": "The answer is Clastic texture" } ], "clip_score": 27.203125, "loss": 4.086913108825684 }, { "id": "identity_37278", "image": "A-OKVQA/images/28589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What usually goes inside of the item with the lid?\nChoices:\n(A) human waste\n(B) towels\n(C) cows\n(D) mcdonald's" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5078125, "loss": 4.176728248596191 }, { "id": "identity_265490", "image": "Super-CLEVR/images/superCLEVR_new_000567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the green object that is equivalent in size to the blue matte motorbike?" }, { "from": "gpt", "value": "The answer is road bike" } ], "clip_score": 26.40625, "loss": 4.146130561828613 }, { "id": "identity_28955", "image": "PMC-VQA/images/PMC7332069_pone.0234881.g001_204196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the patient is shown on the right-hand side of the image? \nChoices:\n(A) right side \n(B) left side \n(C) front side \n(D) back side " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.488181114196777 }, { "id": "identity_296220", "image": "PMC-VQA/images/PMC7283707_tca13498-fig-0001_188201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the CT image of the lung cancer patient? \nChoices:\n(A) Focal GGO \n(B) Consolidation \n(C) Diffuse GGO \n(D) No abnormalities detected" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.765625, "loss": 4.510369300842285 }, { "id": "identity_314743", "image": "ScienceQA/images/1126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does pollen assist a plant in doing?\nChoices:\n(A) grow bigger\n(B) make seeds\n(C) grow new leaves" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.953125, "loss": 4.146522521972656 }, { "id": "identity_294431", "image": "A-OKVQA/images/64251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action does the train need to take before the man on the left can pass?\nChoices:\n(A) stop\n(B) pass by\n(C) reverse\n(D) levitate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2265625, "loss": 4.446590900421143 }, { "id": "identity_285133", "image": "DocVQA/images/xngv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ideal location for The Nutrition Foundations meeting?" }, { "from": "gpt", "value": "The answer is europe" } ], "clip_score": 18.265625, "loss": 6.003003120422363 }, { "id": "identity_337891", "image": "A-OKVQA/images/372804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is the person wearing the conical hat from?\nChoices:\n(A) vietnam\n(B) thailand\n(C) japan\n(D) china" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.53125, "loss": 4.405776023864746 }, { "id": "identity_148841", "image": "DocVQA/images/fhff0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for the introduction segment of the meeting?" }, { "from": "gpt", "value": "The answer is Not specified" } ], "clip_score": 22.828125, "loss": 4.448252201080322 }, { "id": "identity_2824", "image": "PMC-VQA/images/PMC4336564_fig05_360838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the white dashed arrows indicate in the image? \nChoices:\n(A) Particle internalisation \n(B) Particle agglomeration \n(C) Cell nuclei staining \n(D) Particles outside from cells " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.453125, "loss": 4.54638671875 }, { "id": "identity_35598", "image": "A-OKVQA/images/262119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the air?\nChoices:\n(A) bird\n(B) car\n(C) airplane\n(D) man" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.8984375, "loss": 4.096329689025879 }, { "id": "identity_24154", "image": "IconQA/images/77593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) impossible\n(B) probable\n(C) unlikely\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.203125, "loss": 4.1222615242004395 }, { "id": "identity_199697", "image": "A-OKVQA/images/129566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.359375, "loss": 4.574375629425049 }, { "id": "identity_18809", "image": "IconQA/images/101955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sun. Which picture is fourth?\nChoices:\n(A) barn\n(B) tractor\n(C) sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.6875, "loss": 4.16845703125 }, { "id": "identity_335554", "image": "A-OKVQA/images/447082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the skateboard next to the person with blue shoes?\nChoices:\n(A) folded\n(B) bent\n(C) broken\n(D) shredded" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.03125, "loss": 4.344271659851074 }, { "id": "identity_296500", "image": "DocVQA/images/llcv0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activities are scheduled from 9:00 to 9:15 as outlined in the agenda?" }, { "from": "gpt", "value": "The answer is Introduction" } ], "clip_score": 23.578125, "loss": 4.581633567810059 }, { "id": "identity_262030", "image": "FigureQA/images/51952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5, "loss": 4.030437469482422 }, { "id": "identity_295280", "image": "A-OKVQA/images/414588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the specific breed of the animal listed in the table:\n(A) scottish fold\n(B) american lamancha\n(C) german shepherd\n(D) holland lop" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.6875, "loss": 4.259422302246094 }, { "id": "identity_282434", "image": "Super-CLEVR/images/superCLEVR_new_001098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do small purple items outnumber big blue rubber road bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.890625, "loss": 4.984654426574707 }, { "id": "identity_5027", "image": "PMC-VQA/images/PMC7848076_F6_356972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of echocardiography was used? \nChoices:\n(A) Transesophageal echocardiography \n(B) M-mode echocardiography \n(C) Doppler echocardiography \n(D) 3D echocardiography " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.140625, "loss": 4.354702949523926 }, { "id": "identity_212690", "image": "TextVQA/images/0d0cdf59c4561f03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the slogan written on the banner in the top right corner?" }, { "from": "gpt", "value": "The answer is The World is a Guest with Friends" } ], "clip_score": 20.25, "loss": 4.054507255554199 }, { "id": "identity_10423", "image": "Super-CLEVR/images/superCLEVR_new_001999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of green metal objects to the right of the tiny green car and big cyan matte wagons?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.53125, "loss": 5.141232013702393 }, { "id": "identity_185633", "image": "PMC-VQA/images/PMC4243382_Fig2_338936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign called that is seen in the image?" }, { "from": "gpt", "value": "The answer is Stellate sign" } ], "clip_score": 27.40625, "loss": 4.184215545654297 }, { "id": "identity_336170", "image": "A-OKVQA/images/7819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What makes the pavement appear so shiny?\nChoices:\n(A) water\n(B) glass\n(C) ice\n(D) plastic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.636114597320557 }, { "id": "identity_306433", "image": "FigureQA/images/62370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.203125, "loss": 4.000036716461182 }, { "id": "identity_46724", "image": "FigureQA/images/10114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 37.15625, "loss": 4.037381172180176 }, { "id": "identity_11137", "image": "ScienceQA/images/13010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Saint Kitts and Nevis\n(B) Antigua and Barbuda\n(C) Cuba\n(D) Jamaica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.453125, "loss": 4.043562889099121 }, { "id": "identity_266076", "image": "TextVQA/images/3704b4d100aaa20c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lane on the far left currently closed?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.625, "loss": 4.049049377441406 }, { "id": "identity_36444", "image": "TQA/images/abc_question_images/earth_moon_phases_11257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter R represent?\nChoices:\n(A) CRESCENT MOON\n(B) QUARTER MOON\n(C) WAXING CRESCENT\n(D) GIBBOUS MOON" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.78125, "loss": 5.203017711639404 }, { "id": "identity_55372", "image": "PlotQA/images/110127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the highest amount spent by national tourists?" }, { "from": "gpt", "value": "The answer is Guinea" } ], "clip_score": 29.609375, "loss": 4.24674129486084 }, { "id": "identity_198935", "image": "PMC-VQA/images/PMC9187638_Fig2_308785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the white structure indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 19.703125, "loss": 4.241336822509766 }, { "id": "identity_300609", "image": "TQA/images/question_images/seasons_2633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What follows the winter solstice?\nChoices:\n(A) autumnal equinox\n(B) vernal equinox\n(C) summer solstice\n(D) vernal solstice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.5625, "loss": 4.221799850463867 }, { "id": "identity_266522", "image": "Super-CLEVR/images/superCLEVR_new_001227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the matte object that is the same color as the small shiny tandem bike?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 28.4375, "loss": 4.1649346351623535 }, { "id": "identity_299505", "image": "A-OKVQA/images/441862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of products does the brand advertised on his shirt manufacture?\nChoices:\n(A) heavy equipment\n(B) clothing\n(C) furniture\n(D) electronics" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.75, "loss": 4.700438022613525 }, { "id": "identity_195299", "image": "PMC-VQA/images/PMC4188846_Fig1_325915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chambers of the heart can be visualized in this echocardiogram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.34375, "loss": 4.321478843688965 }, { "id": "identity_213230", "image": "Super-CLEVR/images/superCLEVR_new_001880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the small vehicle to the right of the image made of?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 24.90625, "loss": 4.173918724060059 }, { "id": "identity_336792", "image": "TabMWP/images/26869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.317713260650635 }, { "id": "identity_270312", "image": "DVQA/images/bar_train_00004793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, starting from the left, is labeled as the second group?" }, { "from": "gpt", "value": "The answer is lumber" } ], "clip_score": 14.421875, "loss": 4.20452356338501 }, { "id": "identity_211990", "image": "Super-CLEVR/images/superCLEVR_new_000721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the floor in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 19.765625, "loss": 4.117445468902588 }, { "id": "identity_318150", "image": "Super-CLEVR/images/superCLEVR_new_000112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the small object made of the same material as the double bus?" }, { "from": "gpt", "value": "The answer is jet" } ], "clip_score": 20.765625, "loss": 4.06480073928833 }, { "id": "identity_1810", "image": "A-OKVQA/images/19499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of accident is this?\nChoices:\n(A) upside down\n(B) roll-over\n(C) turn-up\n(D) t-bone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5390625, "loss": 4.3539605140686035 }, { "id": "identity_31619", "image": "ScienceQA/images/4412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement describes the Serengeti National Park ecosystem?\nChoices:\n(A) It has warm summers and warm winters.\n(B) It has a small amount of rain." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.109375, "loss": 4.645853519439697 }, { "id": "identity_281175", "image": "TabMWP/images/34195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.28125, "loss": 4.464644908905029 }, { "id": "identity_60524", "image": "DocVQA/images/zhpc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the room number of the sender?" }, { "from": "gpt", "value": "The answer is SA-10" } ], "clip_score": 18.46875, "loss": 4.043420314788818 }, { "id": "identity_181991", "image": "PMC-VQA/images/PMC7212339_fig2_166119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the white star symbol indicate in the image?" }, { "from": "gpt", "value": "The answer is Fluid accumulation or cystic space" } ], "clip_score": 29.75, "loss": 4.023979663848877 }, { "id": "identity_261544", "image": "PMC-VQA/images/PMC4047780_F3_295237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of radiograph is shown in image (B)?\nChoices:\n(A) Panoramic radiograph\n(B) CBCT\n(C) MRI\n(D) CT scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.03125, "loss": 4.451362609863281 }, { "id": "identity_305873", "image": "PMC-VQA/images/PMC6371611_Fig2_434553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area is affected by the opacification according to this?\nChoices:\n(A) Left ICA \n(B) Right ICA \n(C) Both ICAs \n(D) Basilar Artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.481335639953613 }, { "id": "identity_306153", "image": "TQA/images/abc_question_images/human_system_respiratory_16193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Left lung?\nChoices:\n(A) D\n(B) M\n(C) U\n(D) H" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.109375, "loss": 4.198801040649414 }, { "id": "identity_37617", "image": "PMC-VQA/images/PMC7035667_Fig1_122778.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to visualize synovitis? \nChoices:\n(A) CT \n(B) MRI \n(C) X-Ray \n(D) PET " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.6875, "loss": 4.052586555480957 }, { "id": "identity_283730", "image": "A-OKVQA/images/235876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man on the green sign engaged in? \n(A) protesting\n(B) exercising\n(C) photographing\n(D) dancing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.65625, "loss": 4.139152526855469 }, { "id": "identity_1146", "image": "FigureQA/images/80950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink greater than Black?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.90625, "loss": 4.041203498840332 }, { "id": "identity_325390", "image": "DocVQA/images/xklg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is Res. Ass't" } ], "clip_score": 16.421875, "loss": 4.494173049926758 }, { "id": "identity_116889", "image": "VQA2.0/images/524144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clock faces are visible on the tower?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.609375, "loss": 4.407138347625732 }, { "id": "identity_14400", "image": "A-OKVQA/images/52066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the man have to grab to board the train?\nChoices:\n(A) jacket\n(B) suitcase\n(C) newspaper\n(D) newspaper" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.164914131164551 }, { "id": "identity_280421", "image": "Super-CLEVR/images/superCLEVR_new_001572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the motorbike and the cyan aeroplane of equal size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.671875, "loss": 5.376387596130371 }, { "id": "identity_34600", "image": "A-OKVQA/images/14873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is shown?\nChoices:\n(A) public\n(B) rural\n(C) residential\n(D) private" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.7109375, "loss": 4.380185127258301 }, { "id": "identity_17844", "image": "Super-CLEVR/images/superCLEVR_new_001552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a rubber bicycle right of the tiny green bicycle that is to the left of the tiny gray bicycle; what is its size?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 28.75, "loss": 4.013956069946289 }, { "id": "identity_289288", "image": "IconQA/images/67846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.375, "loss": 4.13038969039917 }, { "id": "identity_334127", "image": "PMC-VQA/images/PMC4559320_Fig13_420629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white arrow pointing to in the image?\nChoices:\n(A) A mitochondria \n(B) A healthy synapse \n(C) An electron dense degenerating synapse \n(D) A pre-synapse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.96875, "loss": 4.049134254455566 }, { "id": "identity_255710", "image": "Super-CLEVR/images/superCLEVR_new_000942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are made of the same material as the dirtbike, and what are their colors?" }, { "from": "gpt", "value": "The answer is One vehicle, cyan" } ], "clip_score": 30.640625, "loss": 4.894655227661133 }, { "id": "identity_7336", "image": "ScienceQA/images/15966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is also adapted to be camouflaged in a sandy desert?\nChoices:\n(A) blue poison dart frog\n(B) flat-tail horned lizard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2265625, "loss": 4.009514808654785 }, { "id": "identity_316295", "image": "TQA/images/abc_question_images/parts_telescope_17291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the mounting rail?\nChoices:\n(A) U\n(B) E\n(C) T\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.65625, "loss": 4.538894176483154 }, { "id": "identity_270152", "image": "FigureQA/images/86799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Crimson or Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.4140625, "loss": 4.627316951751709 }, { "id": "identity_320055", "image": "PMC-VQA/images/PMC5650637_F3_230005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is used to display a right hind foot at the level of a proximal third of a navicular bone?\nChoices:\n(A) transverse ultrasound image \n(B) dorsopalmar radiograph \n(C) T1-weighted gradient echo (GRE) image \n(D) CT scan image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.953125, "loss": 4.540431976318359 }, { "id": "identity_329667", "image": "A-OKVQA/images/479948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the net in front of the people there for?\nChoices:\n(A) stop ball\n(B) chicken pen\n(C) player captivity\n(D) punishment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.203125, "loss": 4.918365001678467 }, { "id": "identity_321733", "image": "PMC-VQA/images/PMC7912837_brainsci-11-00179-f003_376679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) Edema\n(B) Hemorrhage\n(C) Tissue loss\n(D) Inflammation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 4.339197635650635 }, { "id": "identity_272415", "image": "VQA2.0/images/63306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the man have on his head?" }, { "from": "gpt", "value": "The answer is gray hat" } ], "clip_score": 14.0546875, "loss": 4.55203104019165 }, { "id": "identity_26095", "image": "PMC-VQA/images/PMC8593536_F1_113266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the wide bony space created in the image? \nChoices:\n(A) Ageing \n(B) Cancer \n(C) The injury \n(D) Natural formation " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.03125, "loss": 4.3631792068481445 }, { "id": "identity_333930", "image": "PMC-VQA/images/PMC6925376_FIG1_88680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bone is fractured and marked by the yellow arrows in this image? \nChoices:\n(A) Capitate bone \n(B) Trapezium bone \n(C) Triquetral bone \n(D) Hamate bone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.015625, "loss": 4.392759799957275 }, { "id": "identity_266490", "image": "FigureQA/images/61955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.859375, "loss": 4.0272722244262695 }, { "id": "identity_279047", "image": "AI2D/images/2974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the diagram above depict?\nChoices:\n(A) Lung\n(B) Leg\n(C) Brain\n(D) Heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.515625, "loss": 4.0685954093933105 }, { "id": "identity_72222", "image": "TQA/images/question_images/human_system_nervous_6181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main nerves are labeled in the leg area?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.765625, "loss": 4.522767066955566 }, { "id": "identity_296012", "image": "PMC-VQA/images/PMC6020486_fig1_333820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following abnormalities is observed in the left eye?\n(A) Optic nerve atrophy\n(B) Swelling of the inferior rectus muscle\n(C) Corneal ulcers\n(D) Subretinal hemorrhage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0625, "loss": 4.154757976531982 }, { "id": "identity_128554", "image": "VQA2.0/images/308796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many doors are visible on the vehicle in the image?" }, { "from": "gpt", "value": "The answer is Two doors" } ], "clip_score": 18.5, "loss": 4.358340263366699 }, { "id": "identity_326347", "image": "ScienceQA/images/3262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) flexible\n(B) hard\n(C) fragile" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6796875, "loss": 4.122613430023193 }, { "id": "identity_328441", "image": "TabMWP/images/37704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.279753684997559 }, { "id": "identity_322369", "image": "DocVQA/images/nqvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of this person?" }, { "from": "gpt", "value": "The answer is Executive Director" } ], "clip_score": 20.53125, "loss": 4.34142541885376 }, { "id": "identity_315658", "image": "AI2D/images/2082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, snakes eat what for energy?\nChoices:\n(A) grass\n(B) grasshoppers\n(C) hawks\n(D) fungi" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.625, "loss": 4.2620625495910645 }, { "id": "identity_296000", "image": "A-OKVQA/images/482671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cuisine is being offered? \nOptions:\n(A) Chinese\n(B) Korean\n(C) Indian\n(D) Japanese" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.203125, "loss": 4.133643627166748 }, { "id": "identity_294583", "image": "PMC-VQA/images/PMC6414139_acm212503-fig-0004_449322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in panel (b) of the image?\nChoices:\n(A) soft tissue in the maxillary sinus\n(B) increase in dose to retina\n(C) surgical flap shrinkage\n(D) left maxillary sinus condition." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.859375, "loss": 4.077760219573975 }, { "id": "identity_111686", "image": "Super-CLEVR/images/superCLEVR_new_001533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.421875, "loss": 4.310135841369629 }, { "id": "identity_279339", "image": "Super-CLEVR/images/superCLEVR_new_000447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the blue and brown objects of the same shape?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.3984375, "loss": 5.6003289222717285 }, { "id": "identity_272309", "image": "ScienceQA/images/11864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina?\n(A) Raleigh\n(B) Harrisburg\n(C) Charlotte\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.828125, "loss": 4.138144016265869 }, { "id": "identity_337183", "image": "FigureQA/images/2934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0078125, "loss": 4.395459175109863 }, { "id": "identity_301938", "image": "FigureQA/images/80821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue's value the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.03125, "loss": 4.169729232788086 }, { "id": "identity_74951", "image": "ScienceQA/images/17187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common use for the object labeled \"corn on the cob\" in the image?" }, { "from": "gpt", "value": "The answer is Food" } ], "clip_score": 17.984375, "loss": 4.042778491973877 }, { "id": "identity_299332", "image": "PMC-VQA/images/PMC8864031_Fig8_199970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict?\nChoices:\n(A) Clusters of amorphous silicon \n(B) Clusters of amorphous diamond \n(C) Clusters of amorphous copper \n(D) Clusters of amorphous iron" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.140625, "loss": 4.345375061035156 }, { "id": "identity_13850", "image": "A-OKVQA/images/425542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who played this sport?\nChoices:\n(A) tom brady\n(B) jim those\n(C) maria sharapova\n(D) bo jackson" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.6875, "loss": 4.128902435302734 }, { "id": "identity_322669", "image": "A-OKVQA/images/243820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is to the right of this?\nChoices:\n(A) cat\n(B) cardboard box\n(C) scissors\n(D) roach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.578125, "loss": 4.19127082824707 }, { "id": "identity_26529", "image": "A-OKVQA/images/90103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicles are prohibited from going beyond the pole?\nChoices:\n(A) taxi\n(B) bikes\n(C) van\n(D) bus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.390625, "loss": 4.245439052581787 }, { "id": "identity_182225", "image": "A-OKVQA/images/60814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many axles does the boat trailer have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.046875, "loss": 4.356797218322754 }, { "id": "identity_209131", "image": "PMC-VQA/images/PMC5906535_F6_303486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the DNA staining in this image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.203125, "loss": 4.073288917541504 }, { "id": "identity_30911", "image": "TQA/images/question_images/tides_128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The Neap Tide is in what orientation relative to the position of the Earth and Sun?\nChoices:\n(A) Perpendicular\n(B) Parallel\n(C) Reverse\n(D) Diagonal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.09375, "loss": 4.528561115264893 }, { "id": "identity_19818", "image": "FigureQA/images/40266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 4.21583366394043 }, { "id": "identity_244824", "image": "ScienceQA/images/18774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the aquatic animal that is a cartilaginous fish and is known for its predatory behavior" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.78125, "loss": 4.056702613830566 }, { "id": "identity_337311", "image": "ScienceQA/images/7901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Manchester\n(B) Albany\n(C) Tulsa\n(D) Concord" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.59375, "loss": 4.998361587524414 }, { "id": "identity_141460", "image": "MapQA/images/map_6966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value greater than 5.3M?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 24.578125, "loss": 4.622791767120361 }, { "id": "identity_303959", "image": "FigureQA/images/56209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.71875, "loss": 4.08229398727417 }, { "id": "identity_184175", "image": "TextVQA/images/292038ec773d1a10.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the background behind the electronic billboard?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 13.6796875, "loss": 4.318329811096191 }, { "id": "identity_268561", "image": "A-OKVQA/images/196899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what type of beverage are the smaller glasses intended? \nChoices:\n(A) milk\n(B) wine\n(C) tea\n(D) coffee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.921875, "loss": 4.107181549072266 }, { "id": "identity_13704", "image": "Super-CLEVR/images/superCLEVR_new_000770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the tiny metal object right of the matte car the same as the small matte object that is on the right side of the green thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.71875, "loss": 5.35245418548584 }, { "id": "identity_280329", "image": "FigureQA/images/62466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.75, "loss": 4.521633148193359 }, { "id": "identity_324792", "image": "PMC-VQA/images/PMC7353225_nanomaterials-10-01086-f020_211543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were used as upper and lower insets in these images?\nChoices:\n(A) SEM images \n(B) HRTEM images and SAED patterns \n(C) AFM images \n(D) CT images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.78125, "loss": 4.386988639831543 }, { "id": "identity_277189", "image": "A-OKVQA/images/52153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item is located on the cabinet?\nChoices:\n(A) television\n(B) apple pie\n(C) cat\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.46875, "loss": 4.108639240264893 }, { "id": "identity_337890", "image": "PMC-VQA/images/PMC7317310_fig1_199546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of stain was used to view the lung sections?\nChoices:\n(A) Fluorescent stain \n(B) HE stain \n(C) Iodine stain \n(D) Gram stain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.1875, "loss": 4.777656555175781 }, { "id": "identity_309092", "image": "AI2D/images/3607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is directly below this?\n\nChoices:\n(A) Nasal Cavity\n(B) Alveoli\n(C) Trachea\n(D) Diaphragm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.25, "loss": 4.942188739776611 }, { "id": "identity_24982", "image": "A-OKVQA/images/172057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is being held here?\nChoices:\n(A) wedding\n(B) tractor pull\n(C) expo\n(D) jail break" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.515625, "loss": 4.0346550941467285 }, { "id": "identity_296137", "image": "A-OKVQA/images/507087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which music group among Cream, NSYNC, Backstreet Boys, and Spice Girls would be able to utilize all of these boards without the need to share?\nChoices:\n(A) cream\n(B) nsync\n(C) backstreet boys\n(D) spice girls" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.1015625, "loss": 4.024636745452881 }, { "id": "identity_292339", "image": "A-OKVQA/images/275997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the plastic bag hanging on the door handle? Is it being used to collect baseball cards, food, laundry, or garbage?\nChoices:\n(A) baseball cards\n(B) food\n(C) laundry\n(D) garbage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.296875, "loss": 4.103072643280029 }, { "id": "identity_320968", "image": "PMC-VQA/images/PMC3874542_F5_254590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which column shows the changes in connectivity of the L dlPFC?\nChoices:\n(A) Left \n(B) Right \n(C) Middle \n(D) Upper" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.234375, "loss": 4.644777297973633 }, { "id": "identity_16961", "image": "FigureQA/images/64448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Tomato have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.421875, "loss": 4.479647159576416 }, { "id": "identity_107795", "image": "TextVQA/images/02c60b3d3d688cbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fire escape ladders are visible on the buildings?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.15625, "loss": 4.58404541015625 }, { "id": "identity_268836", "image": "TQA/images/abc_question_images/human_system_eye_16126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the clear, protective covering on the outside of the eye represented by? \nChoices:\n(A) K\n(B) T\n(C) C\n(D) P" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.46875, "loss": 4.19121789932251 }, { "id": "identity_263654", "image": "FigureQA/images/78767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Cyan or Medium Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.171875, "loss": 4.677433013916016 }, { "id": "identity_284905", "image": "PMC-VQA/images/PMC5572699_Fig10_207363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage of the embryo is depicted in the image provided? Is it:\n(A) Zygote stage\n(B) Early blastocyst stage\n(C) Late blastocyst stage\n(D) Fetus stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0, "loss": 4.9152655601501465 }, { "id": "identity_22116", "image": "PMC-VQA/images/PMC4998391_F4_66474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in the left breast of the patient? \nChoices:\n(A) Left upper outer quadrant \n(B) Left lower outer quadrant \n(C) Right upper outer quadrant \n(D) Right lower outer quadrant " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.578125, "loss": 4.107863903045654 }, { "id": "identity_302769", "image": "PMC-VQA/images/PMC7788390_F1_336143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the blue lines in the image represent?\nChoices:\n(A) Wind direction\n(B) Longitude\n(C) Streams\n(D) Currents" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.8125, "loss": 4.47808837890625 }, { "id": "identity_334958", "image": "PMC-VQA/images/PMC7346301_FIG1_208850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the enhancing mass located in the MRI?\nChoices:\n(A) Left frontal lobe\n(B) Right frontal lobe\n(C) Left lateral ventricle\n(D) Right lateral ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.65625, "loss": 4.512439727783203 }, { "id": "identity_299232", "image": "A-OKVQA/images/21926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of area depicted in the image:\n(A) coastal\n(B) beach\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.84375, "loss": 4.769618988037109 }, { "id": "identity_32694", "image": "A-OKVQA/images/454457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the roof made of?\nChoices:\n(A) metal\n(B) vinyl\n(C) brick\n(D) wood" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.078125, "loss": 4.3193559646606445 }, { "id": "identity_323439", "image": "A-OKVQA/images/516239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely destination of the individuals on the truck?\nChoices:\n(A) dance\n(B) work\n(C) shopping\n(D) party" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.75, "loss": 4.738711357116699 }, { "id": "identity_312624", "image": "DVQA/images/bar_train_00149197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the palegoldenrod color represent?" }, { "from": "gpt", "value": "The answer is design" } ], "clip_score": 22.265625, "loss": 4.107415199279785 }, { "id": "identity_112206", "image": "PMC-VQA/images/PMC5977127_jcm-07-00088-f003_321235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the hematoma located in the right or left side of the body?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 26.5625, "loss": 4.036357402801514 }, { "id": "identity_262927", "image": "FigureQA/images/59403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.59375, "loss": 4.490159511566162 }, { "id": "identity_219740", "image": "TextVQA/images/026b958c8a6930b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company on the window above the shop?" }, { "from": "gpt", "value": "The answer is Spherion" } ], "clip_score": 18.453125, "loss": 4.254387378692627 }, { "id": "identity_25577", "image": "FigureQA/images/98804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Purple have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.252765655517578 }, { "id": "identity_117740", "image": "VQA2.0/images/238982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.015625, "loss": 4.698328971862793 }, { "id": "identity_6466", "image": "FigureQA/images/1895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red greater than Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.328125, "loss": 4.093144416809082 }, { "id": "identity_300136", "image": "PMC-VQA/images/PMC7692147_jcm-09-03411-f002_305983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of X-ray is shown in the image?\nChoices:\n(A) Lateral X-ray\n(B) Anteroposterior X-ray\n(C) CT scan\n(D) MRI scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.75, "loss": 4.560146331787109 }, { "id": "identity_246081", "image": "Super-CLEVR/images/superCLEVR_new_001966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same shape as the big yellow matte object, and what are their relative positions to the big regular bus?" }, { "from": "gpt", "value": "The answer is Two cars; one left, one right" } ], "clip_score": 27.84375, "loss": 4.513045787811279 }, { "id": "identity_25288", "image": "A-OKVQA/images/129108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of town is this crosswalk?\nChoices:\n(A) china town\n(B) downtown\n(C) polish town\n(D) italian town" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.25, "loss": 4.634045124053955 }, { "id": "identity_248234", "image": "ChartQA/images/multi_col_40559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the percentage of stay-at-home mothers between 1989 and 2012, and how does this compare to the difference in the percentage of stay-at-home fathers between the same years?" }, { "from": "gpt", "value": "The answer is Both differences are 6%" } ], "clip_score": 26.875, "loss": 4.001002788543701 }, { "id": "identity_282829", "image": "A-OKVQA/images/575027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does the green building on the left side of the skate park serve?\nChoices:\n(A) parking area\n(B) grocery store\n(C) gas station\n(D) convenience store" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.859375, "loss": 4.4931321144104 }, { "id": "identity_247525", "image": "A-OKVQA/images/22118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons people might be using umbrellas and laying on blankets at the beach, and which reason is most likely based on the image?" }, { "from": "gpt", "value": "The answer is To protect from the sun and to tan" } ], "clip_score": 31.046875, "loss": 4.153205394744873 }, { "id": "identity_16252", "image": "A-OKVQA/images/306420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the scarf called being worn by the women?\nChoices:\n(A) neckies\n(B) dupatta\n(C) hijab\n(D) dickies" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.515625, "loss": 4.731703758239746 }, { "id": "identity_7658", "image": "FigureQA/images/84449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Olive greater than Indigo?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.71875, "loss": 4.032905578613281 }, { "id": "identity_187949", "image": "VQA2.0/images/429974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the boat that is closest to the camera on the left?" }, { "from": "gpt", "value": "The answer is NORMA" } ], "clip_score": 25.1875, "loss": 4.027304649353027 }, { "id": "identity_308347", "image": "AI2D/images/3991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is responsible for secretions?\nChoices:\n(A) Mitochondria\n(B) Cell wall\n(C) Golgi apparatus\n(D) Vacuoles" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.061864852905273 }, { "id": "identity_233014", "image": "A-OKVQA/images/194744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual cues in the image that indicate the pizza is being eaten in a food court, and how do these cues collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is Background architecture, seating arrangement, lighting and ambiance" } ], "clip_score": 25.53125, "loss": 4.075625419616699 }, { "id": "identity_23039", "image": "ScienceQA/images/20389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Haley's experiment can best answer.\nChoices:\n(A) Does the humidity level where tomato seeds are planted affect the number of tomato seedlings that grow?\n(B) Do more tomato seedlings grow when they are planted in soil with fertilizer compared to soil without fertilizer?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.056041717529297 }, { "id": "identity_297637", "image": "ScienceQA/images/11208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city on the map is indicated by the marking? \nChoices:\n(A) Boston\n(B) Houston\n(C) Washington, D.C.\n(D) St. Louis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.96875, "loss": 4.473837852478027 }, { "id": "identity_301520", "image": "AI2D/images/2937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the outermost structure?\nChoices:\n(A) Membrane\n(B) Outer arm\n(C) Radial link\n(D) Nexin Bridge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.875, "loss": 4.221871376037598 }, { "id": "identity_221509", "image": "A-OKVQA/images/309412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many railroad crossing signs are visible?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.25, "loss": 4.140434741973877 }, { "id": "identity_231879", "image": "VQA2.0/images/477259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the man throwing the Frisbee wearing, and what color is it?" }, { "from": "gpt", "value": "The answer is barefoot" } ], "clip_score": 23.5, "loss": 4.152026653289795 }, { "id": "identity_81", "image": "IconQA/images/85961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of ovals even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5, "loss": 4.433650970458984 }, { "id": "identity_324368", "image": "ScienceQA/images/20431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown here?\nChoices:\n(A) Pennsylvania\n(B) New Jersey\n(C) Rhode Island\n(D) New York" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.625, "loss": 4.728997230529785 }, { "id": "identity_25826", "image": "A-OKVQA/images/448648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are is the image from?\nChoices:\n(A) underground\n(B) city\n(C) forest\n(D) sky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.401334285736084 }, { "id": "identity_210520", "image": "VQA-AS/images/6897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living beings are visible in this image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.609375, "loss": 4.359976768493652 }, { "id": "identity_300135", "image": "VQA2.0/images/252779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the stripes?" }, { "from": "gpt", "value": "The answer is on platforms" } ], "clip_score": 18.640625, "loss": 4.60985803604126 }, { "id": "identity_297555", "image": "ScienceQA/images/1022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being highlighted in the following options: The Bahamas, Dominica, Jamaica, or the Dominican Republic?\nChoices:\n(A) The Bahamas\n(B) Dominica\n(C) Jamaica\n(D) the Dominican Republic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.703125, "loss": 4.02060604095459 }, { "id": "identity_267043", "image": "TQA/images/abc_question_images/nuclear_energy_17097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify label A from the following options:\n(A) COOLING BASIN\n(B) HEAT EXCHANGER\n(C) STEAM\n(D) THE CORE" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.65625, "loss": 4.0644731521606445 }, { "id": "identity_311937", "image": "Super-CLEVR/images/superCLEVR_new_000339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer green cars right of the large yellow motorbike than big rubber choppers that are left of the large shiny bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.203125, "loss": 5.122924327850342 }, { "id": "identity_21825", "image": "PMC-VQA/images/PMC7755720_FIG1_325144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What medical procedure is being performed in the image? \nChoices:\n(A) Liver biopsy \n(B) Cardiac catheterization \n(C) Balloon valve inflation \n(D) Colonoscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.59375, "loss": 4.650875091552734 }, { "id": "identity_315546", "image": "TextVQA/images/1ea75b6f56c2a71c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is considered a king of social media?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 19.640625, "loss": 4.255837440490723 }, { "id": "identity_291473", "image": "DocVQA/images/mzfm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code for Otology?" }, { "from": "gpt", "value": "The answer is ot" } ], "clip_score": 18.421875, "loss": 4.451993942260742 }, { "id": "identity_188941", "image": "PMC-VQA/images/PMC6517476_F2_479594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tissue blocks are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 22.078125, "loss": 4.270831108093262 }, { "id": "identity_283602", "image": "PMC-VQA/images/PMC6156007_F2_371926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what point was the CT examination displayed in the image captured? \nOptions:\n(A) Prior to the ankle joint dislocation,\n(B) Following the ankle joint dislocation,\n(C) Amidst the ankle joint dislocation,\n(D) None of the options provided." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.765625, "loss": 4.575339317321777 }, { "id": "identity_10718", "image": "A-OKVQA/images/183620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's the woman attempting to hit?\nChoices:\n(A) boxes\n(B) speakers\n(C) containers\n(D) televisions" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.46875, "loss": 4.337249279022217 }, { "id": "identity_37423", "image": "PMC-VQA/images/PMC5156805_fig2_105366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following chambers is separated by the membrane in cor triatriatum? \nChoices:\n(A) Right atrium and right ventricle \n(B) Right atrium and left atrium \n(C) Left atrium and left ventricle \n(D) Left ventricle and right ventricle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.078125, "loss": 4.04518985748291 }, { "id": "identity_38731", "image": "PMC-VQA/images/PMC4677348_f3_453251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reconstruction method used in the study? \nChoices:\n(A) MRI \n(B) CT scan \n(C) PET scan \n(D) δ/β imaging " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.96875, "loss": 4.6756768226623535 }, { "id": "identity_266624", "image": "ScienceQA/images/13750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.546875, "loss": 4.053279876708984 }, { "id": "identity_200165", "image": "Super-CLEVR/images/superCLEVR_new_001658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the airplane closest to the purple bicycle?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 31.828125, "loss": 4.181432247161865 }, { "id": "identity_146215", "image": "DocVQA/images/nrmg0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the remark for Room 329?" }, { "from": "gpt", "value": "The answer is Knoll" } ], "clip_score": 22.765625, "loss": 4.901393413543701 }, { "id": "identity_40700", "image": "DVQA/images/bar_train_00046547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest combined value in the 'skirt' group?" }, { "from": "gpt", "value": "The answer is thing" } ], "clip_score": 25.6875, "loss": 4.62827730178833 }, { "id": "identity_11273", "image": "PMC-VQA/images/PMC8342271_Fig3_27574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the consequence of the plasmacytoma seen in the image?\nChoices:\n(A) Plural effusion \n(B) Cardiomyopathy \n(C) Myelopathy \n(D) Osteoporosis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.359375, "loss": 4.1568217277526855 }, { "id": "identity_23922", "image": "ScienceQA/images/14657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Utah?\nChoices:\n(A) Provo\n(B) Salt Lake City\n(C) Sioux Falls\n(D) Boise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.24957275390625 }, { "id": "identity_152663", "image": "ScienceQA/images/17925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries share a border with the highlighted country?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 27.046875, "loss": 4.410182952880859 }, { "id": "identity_58759", "image": "DVQA/images/bar_train_00089909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of 'whisky' units?" }, { "from": "gpt", "value": "The answer is dust" } ], "clip_score": 31.71875, "loss": 4.143820762634277 }, { "id": "identity_48536", "image": "DocVQA/images/qpvg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient?" }, { "from": "gpt", "value": "The answer is Dr. Damaj" } ], "clip_score": 18.875, "loss": 4.40593147277832 }, { "id": "identity_323747", "image": "TQA/images/question_images/acid_rain_formation_8001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which chemical is not produced by human activities?\nChoices:\n(A) NOx\n(B) SO2\n(C) VOX\n(D) VOC" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.796875, "loss": 4.130418300628662 }, { "id": "identity_327591", "image": "PMC-VQA/images/PMC9427313_fig2_389351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk symbol indicate in this image?\nChoices:\n(A) Left portal branch\n(B) Right portal branch\n(C) Inferior vena cava\n(D) Aorta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 4.695822238922119 }, { "id": "identity_286453", "image": "AI2D/images/65.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process is represented by stage A in the diagram?\n(A) Decomposition\n(B) Photosynthesis\n(C) Dissolving/Evaporation\n(D) Condensation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.334840297698975 }, { "id": "identity_38492", "image": "A-OKVQA/images/342408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the dog mostly dry?\nChoices:\n(A) indoors\n(B) umbrella\n(C) hot\n(D) tent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.640625, "loss": 4.6435322761535645 }, { "id": "identity_206426", "image": "A-OKVQA/images/83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is predominantly featured in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 24.0, "loss": 4.006413459777832 }, { "id": "identity_1088", "image": "TQA/images/abc_question_images/seasons_11042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the vernal equinox?\nChoices:\n(A) V\n(B) U\n(C) T\n(D) J" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0625, "loss": 4.308420658111572 }, { "id": "identity_300103", "image": "TextVQA/images/014fbf8ef4b53fd2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why are they sitting at the desk?" }, { "from": "gpt", "value": "The answer is bildu" } ], "clip_score": 16.046875, "loss": 5.683621883392334 }, { "id": "identity_212767", "image": "A-OKVQA/images/315432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the boat located second from the left?" }, { "from": "gpt", "value": "The answer is Verna A II" } ], "clip_score": 16.84375, "loss": 4.760260105133057 }, { "id": "identity_266173", "image": "A-OKVQA/images/104931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction does the black surface under the person wearing a pink jacket go?\nChoices:\n(A) none\n(B) down\n(C) sideways\n(D) up" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.4921875, "loss": 4.163632392883301 }, { "id": "identity_324405", "image": "PMC-VQA/images/PMC7397898_vms3273-fig-0002_224754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the asterisk in this image?\nChoices:\n(A) The left retroperitoneum\n(B) The right circumvaval ureter\n(C) The caudal vena cava\n(D) The mid abdomen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.046875, "loss": 4.177335739135742 }, { "id": "identity_290923", "image": "A-OKVQA/images/508722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aspect of this sport is considered crucial?\nChoices:\n(A) looks\n(B) fun\n(C) security\n(D) fame" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.453125, "loss": 4.632972717285156 }, { "id": "identity_20913", "image": "PMC-VQA/images/PMC6641677_F1_11576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in the image indicate? \nChoices:\n(A) Abnormal structure of pulmonary arteries. \n(B) Flow of blood in the patient's body. \n(C) Right and Left pulmonary arteries. \n(D) Blood clot in main pulmonary artery. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 4.729453086853027 }, { "id": "identity_190330", "image": "PMC-VQA/images/PMC5356317_Fig6_141696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many directional labels are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.21875, "loss": 4.695998191833496 }, { "id": "identity_229770", "image": "TextVQA/images/8774ea9366a4c286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different directions are indicated by the one-way signs, and what is the direction of the pedestrian crossing sign?" }, { "from": "gpt", "value": "The answer is One unique direction, left" } ], "clip_score": 33.09375, "loss": 5.130604267120361 }, { "id": "identity_293025", "image": "IconQA/images/63627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do all the horses have a saddle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.03125, "loss": 4.267394065856934 }, { "id": "identity_293005", "image": "A-OKVQA/images/182801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When the light is red, who has the right of way at the crossing?\nChoices:\n(A) women\n(B) ceos\n(C) pedestrians\n(D) drivers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.390625, "loss": 4.265566825866699 }, { "id": "identity_295392", "image": "A-OKVQA/images/491981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the means of escape for people in case of a fire?\nChoices:\n(A) emergency door\n(B) open entryway\n(C) trap door\n(D) open window" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.2421875, "loss": 4.42415714263916 }, { "id": "identity_194535", "image": "PMC-VQA/images/PMC4412507_Fig1_381542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the highlighted area in the image?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 21.703125, "loss": 4.613837242126465 }, { "id": "identity_39892", "image": "DVQA/images/bar_train_00112689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference in the 'dirt' category?" }, { "from": "gpt", "value": "The answer is stop" } ], "clip_score": 30.734375, "loss": 4.172173976898193 }, { "id": "identity_36417", "image": "A-OKVQA/images/437967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meat on the dish?\nChoices:\n(A) chicken\n(B) beef\n(C) pork chop\n(D) salmon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.890625, "loss": 4.116624355316162 }, { "id": "identity_213423", "image": "A-OKVQA/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible words are there on the front of the motorcycle?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.875, "loss": 4.466912746429443 }, { "id": "identity_282997", "image": "A-OKVQA/images/113757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How can this classroom be categorized in terms of its type?\nChoices:\n(A) podium\n(B) amphitheater\n(C) stadium\n(D) enclave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0, "loss": 4.671560764312744 }, { "id": "identity_247424", "image": "A-OKVQA/images/578506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the building with blue rails and describe two features that confirm its function" }, { "from": "gpt", "value": "The answer is Police station with sign and emblem" } ], "clip_score": 30.359375, "loss": 4.370678424835205 }, { "id": "identity_274577", "image": "A-OKVQA/images/71301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which skier is most likely to collide with the blue wall?\nChoices:\n(A) middle skier\n(B) right skier\n(C) left skier\n(D) upcoming skier" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5, "loss": 4.127936363220215 }, { "id": "identity_309918", "image": "FigureQA/images/4824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum value for Dim Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.463321208953857 }, { "id": "identity_202931", "image": "TextVQA/images/01f805f22565b44c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are predominantly visible in the traffic jam?" }, { "from": "gpt", "value": "The answer is Personal cars" } ], "clip_score": 24.5625, "loss": 4.258732795715332 }, { "id": "identity_338346", "image": "TextVQA/images/f0cb340ffd238456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is one of the names on this?" }, { "from": "gpt", "value": "The answer is siegfried" } ], "clip_score": 12.0234375, "loss": 4.264183044433594 }, { "id": "identity_38676", "image": "AI2D/images/1131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part does label E indicates?\nChoices:\n(A) Flower\n(B) Fruit\n(C) Leaf\n(D) Stem" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.5625, "loss": 4.346299648284912 }, { "id": "identity_34504", "image": "PMC-VQA/images/PMC4168241_fig1_321153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being measured in the image? \nChoices:\n(A) Ovarian cyst \n(B) Uterine fibroid \n(C) Endometrial carcinoma \n(D) Cervical polyp " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.625, "loss": 4.508026123046875 }, { "id": "identity_309221", "image": "VQA-AS/images/4512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the picture on the wall of a person?" }, { "from": "gpt", "value": "The answer is female" } ], "clip_score": 24.53125, "loss": 5.666969299316406 }, { "id": "identity_295279", "image": "VQA2.0/images/540436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the food and the camera?" }, { "from": "gpt", "value": "The answer is very" } ], "clip_score": 13.2109375, "loss": 4.779190540313721 }, { "id": "identity_219696", "image": "A-OKVQA/images/289883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the destination displayed on the bus?" }, { "from": "gpt", "value": "The answer is Burg. Fockema Andreaelaan" } ], "clip_score": 28.8125, "loss": 5.022667407989502 }, { "id": "identity_136870", "image": "PlotQA/images/49906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the value added by the industry sector to the services sector for Sri Lanka?" }, { "from": "gpt", "value": "The answer is a float number" } ], "clip_score": 24.328125, "loss": 4.307729721069336 }, { "id": "identity_30664", "image": "ScienceQA/images/14700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Pennsylvania\n(B) Connecticut\n(C) New York\n(D) Massachusetts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.25, "loss": 4.641887187957764 }, { "id": "identity_129126", "image": "TextVQA/images/9b0e803a144e8bb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.9609375, "loss": 4.243525505065918 }, { "id": "identity_212959", "image": "TextVQA/images/00f9078d169011a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umpires are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.84375, "loss": 4.551244258880615 }, { "id": "identity_320880", "image": "A-OKVQA/images/265080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the home team located?\nChoices:\n(A) kansas\n(B) missouri\n(C) oklahoma\n(D) texas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8203125, "loss": 4.255561351776123 }, { "id": "identity_320670", "image": "A-OKVQA/images/102672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the animal in the foreground?\nChoices:\n(A) hat\n(B) baby\n(C) saddle\n(D) knight" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.34375, "loss": 4.457551002502441 }, { "id": "identity_6668", "image": "VizWiz/images/VizWiz_train_00007241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this railing?" }, { "from": "gpt", "value": "The answer is pipe" } ], "clip_score": 32.0625, "loss": 5.652739524841309 }, { "id": "identity_291964", "image": "IconQA/images/32231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.658348083496094 }, { "id": "identity_298157", "image": "A-OKVQA/images/73310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is located at the bottom left corner among the following choices: teacup, giraffe, egg, or baby?\nChoices:\n(A) teacup\n(B) giraffe\n(C) egg\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.109375, "loss": 4.207566738128662 }, { "id": "identity_309828", "image": "FigureQA/images/34734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0234375, "loss": 4.5057902336120605 }, { "id": "identity_191653", "image": "A-OKVQA/images/344292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many large pots are hanging from the rack above the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.484375, "loss": 4.331157684326172 }, { "id": "identity_13053", "image": "TextVQA/images/727356147414700a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written above the blue mountain logo?" }, { "from": "gpt", "value": "The answer is the world famous" } ], "clip_score": 27.984375, "loss": 5.144681453704834 }, { "id": "identity_281697", "image": "PMC-VQA/images/PMC6872567_Fig6_72289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image signify?\nChoices:\n(A) Collagen network \n(B) Aortic wall \n(C) Atherosclerotic plaque \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.90625, "loss": 4.607452392578125 }, { "id": "identity_267235", "image": "AI2D/images/1475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is Pyroclastic composed of?\nChoices:\n(A) none of the above\n(B) rainwater\n(C) rock\n(D) salt water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.040139675140381 }, { "id": "identity_50973", "image": "DocVQA/images/fzvm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the transmission mode used for this report?" }, { "from": "gpt", "value": "The answer is EMHR" } ], "clip_score": 24.390625, "loss": 5.630518436431885 }, { "id": "identity_284207", "image": "A-OKVQA/images/194669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way did he manage to get onto the back of the truck?\nChoices:\n(A) climbed on\n(B) fell on\n(C) lives there\n(D) jumped on" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.6875, "loss": 4.071934700012207 }, { "id": "identity_28663", "image": "PMC-VQA/images/PMC7034773_Fig2_122296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing? \nChoices:\n(A) 2D rendering of tracts on a lateral view \n(B) 3D rendering of representative tracts on a lateral view \n(C) A representative tract on a back view \n(D) 3D rendering of tracts on a back view. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.066335201263428 }, { "id": "identity_181979", "image": "VQA2.0/images/73951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts the person in the foreground is wearing?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.875, "loss": 4.994690418243408 }, { "id": "identity_320402", "image": "ScienceQA/images/1887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Topeka\n(B) Cincinnati\n(C) Columbus\n(D) Fayetteville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.1299662590026855 }, { "id": "identity_265082", "image": "IconQA/images/13415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.90625, "loss": 4.340823173522949 }, { "id": "identity_254762", "image": "ScienceQA/images/8848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property matches this object, and what is its primary use in cooking?" }, { "from": "gpt", "value": "The answer is Bumpy, side dish" } ], "clip_score": 29.171875, "loss": 4.110901832580566 }, { "id": "identity_207024", "image": "PMC-VQA/images/PMC7018895_Fig2_117014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the lesion in relation to the surrounding brain structures?" }, { "from": "gpt", "value": "The answer is Small to medium size" } ], "clip_score": 29.734375, "loss": 4.088269233703613 }, { "id": "identity_259713", "image": "VQA2.0/images/215616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of shoes is he selling?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.828125, "loss": 4.50266170501709 }, { "id": "identity_323683", "image": "A-OKVQA/images/352288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this structure in the park used for?\nChoices:\n(A) storing kites\n(B) office space\n(C) bathroom\n(D) stage presentations" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.453125, "loss": 4.251708984375 }, { "id": "identity_307734", "image": "ScienceQA/images/6554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Pierre\n(C) Jefferson City\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.78125, "loss": 4.366106986999512 }, { "id": "identity_127864", "image": "VQA2.0/images/449840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.953125, "loss": 4.163298606872559 }, { "id": "identity_30387", "image": "TQA/images/question_images/solar_system_4317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which planet is the hottest one in the solar system?\nChoices:\n(A) Mercury\n(B) Mars\n(C) Pluto\n(D) Venus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.515625, "loss": 4.204694747924805 }, { "id": "identity_25078", "image": "A-OKVQA/images/106978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of business is this street vendor engaged in?\nChoices:\n(A) selling\n(B) entertainment\n(C) shoe shine\n(D) art" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.430652618408203 }, { "id": "identity_289827", "image": "A-OKVQA/images/6725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is located close to the laptop: egg, chair, apple, or bacon?\nChoices:\n(A) egg\n(B) chair\n(C) apple\n(D) bacon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.9375, "loss": 4.723889350891113 }, { "id": "identity_284961", "image": "TextVQA/images/353cfc5f107e8805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the greeting written on the store's window?" }, { "from": "gpt", "value": "The answer is hi!!" } ], "clip_score": 19.3125, "loss": 4.302698612213135 }, { "id": "identity_117905", "image": "TextVQA/images/2c9b85d8357eae90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the \"Tuba of Charm\" card according to its description?" }, { "from": "gpt", "value": "The answer is Gives +3 to Run Away, open one treasure" } ], "clip_score": 20.96875, "loss": 4.061432838439941 }, { "id": "identity_31464", "image": "ScienceQA/images/10301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Matt's test show?\nChoices:\n(A) how well the weather station would work when it was windy\n(B) if the weather station would work when the temperature was 50°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.046875, "loss": 4.471170902252197 }, { "id": "identity_323555", "image": "A-OKVQA/images/473720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual doing with the food in front of them?\nChoices:\n(A) eating\n(B) selling\n(C) decorating\n(D) cooking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.796875, "loss": 4.141436576843262 }, { "id": "identity_34176", "image": "A-OKVQA/images/119761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport is this?\nChoices:\n(A) soccer\n(B) baseball\n(C) equestrian\n(D) basketball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5390625, "loss": 4.20891809463501 }, { "id": "identity_298505", "image": "FigureQA/images/38285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Blue or Dark Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.6875, "loss": 4.553133964538574 }, { "id": "identity_126074", "image": "VQA2.0/images/119458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the background?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 11.109375, "loss": 4.389971733093262 }, { "id": "identity_113244", "image": "Super-CLEVR/images/superCLEVR_new_001049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle that is the same size as the yellow dirtbike?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 22.171875, "loss": 4.352858543395996 }, { "id": "identity_322588", "image": "A-OKVQA/images/134870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the M on the signs represent?\nChoices:\n(A) masterchef\n(B) mcdonalds\n(C) mcmurdo\n(D) motor cars" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.015625, "loss": 4.405320644378662 }, { "id": "identity_292473", "image": "AI2D/images/3415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can the shell be found?\nChoices:\n(A) A\n(B) E\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.065415859222412 }, { "id": "identity_141330", "image": "MapQA/images/map_2824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the higher percentage category?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.921875, "loss": 4.749885082244873 }, { "id": "identity_163099", "image": "GeoQA+/images/4475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rectangle ABCD, if AB:AD = 2 and E is the midpoint of AB, what is the length of line segment CF when F is the midpoint of EC?" }, { "from": "gpt", "value": "The answer is AD" } ], "clip_score": 32.09375, "loss": 4.251214504241943 }, { "id": "identity_267592", "image": "FigureQA/images/97219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Burlywood? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.765625, "loss": 4.21542501449585 }, { "id": "identity_336414", "image": "A-OKVQA/images/279083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person here cause the other person to do?\nChoices:\n(A) quit\n(B) go home\n(C) wait\n(D) swing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.140625, "loss": 4.069878578186035 }, { "id": "identity_26013", "image": "A-OKVQA/images/212241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the wires above the train for?\nChoices:\n(A) climbing\n(B) decoration\n(C) protection\n(D) power" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.5, "loss": 4.044293403625488 }, { "id": "identity_158496", "image": "ScienceQA/images/17869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of jewelry is shown in the image?" }, { "from": "gpt", "value": "The answer is Ring" } ], "clip_score": 28.65625, "loss": 4.341540336608887 }, { "id": "identity_318218", "image": "DVQA/images/bar_train_00193347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across these datasets?" }, { "from": "gpt", "value": "The answer is sphere" } ], "clip_score": 32.34375, "loss": 4.640172958374023 }, { "id": "identity_190650", "image": "VQA-AS/images/7393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.96875, "loss": 4.480382442474365 }, { "id": "identity_302120", "image": "AI2D/images/2218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by label I?\nChoices:\n(A) decay-c\n(B) decay-b\n(C) grow-a\n(D) grow-b" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.234375, "loss": 4.12233829498291 }, { "id": "identity_311581", "image": "A-OKVQA/images/360595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is permitted to share and use this area?\nChoices:\n(A) members only\n(B) anyone\n(C) police only\n(D) wealthy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4921875, "loss": 4.78162145614624 }, { "id": "identity_16935", "image": "DVQA/images/bar_train_00155442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 28.0625, "loss": 4.177034854888916 }, { "id": "identity_72681", "image": "AI2D/images/1297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are depicted in the life cycle within the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 33.09375, "loss": 4.470671653747559 }, { "id": "identity_110714", "image": "TextVQA/images/5082475b716d3933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sub-dials are present on the watch face?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.4375, "loss": 4.536149501800537 }, { "id": "identity_208776", "image": "PMC-VQA/images/PMC7072145_cancers-12-00371-f004_134014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the images?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.65625, "loss": 4.044759750366211 }, { "id": "identity_5910", "image": "DocVQA/images/kzbn0226_33.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is Step 1. for conducting the risk evaluation?" }, { "from": "gpt", "value": "The answer is scope/screening" } ], "clip_score": 27.734375, "loss": 4.217597007751465 }, { "id": "identity_280035", "image": "IconQA/images/9458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape of this object:\n(A) cube\n(B) cylinder\n(C) sphere\n(D) cone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.296875, "loss": 4.41934871673584 }, { "id": "identity_34519", "image": "IconQA/images/60708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.0, "loss": 4.129162788391113 }, { "id": "identity_241096", "image": "Super-CLEVR/images/superCLEVR_new_000251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the sizes of the brown rubber object and the object to the right of the tiny yellow object. Then, determine if the brown rubber object is larger than the purple bicycle" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.4375, "loss": 4.658090114593506 }, { "id": "identity_14339", "image": "TextVQA/images/9336941e26e22e1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the title of the book?" }, { "from": "gpt", "value": "The answer is weird science" } ], "clip_score": 31.875, "loss": 4.203630447387695 }, { "id": "identity_246432", "image": "PMC-VQA/images/PMC3192142_pone-0026113-g001_111577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the orange dots on the image relate to the anatomical landmarks and functional areas of the brain, and what does this imply about the precision of the mapping technique used?" }, { "from": "gpt", "value": "The answer is High precision in mapping technique around the hand-knob area" } ], "clip_score": 29.390625, "loss": 5.061913013458252 }, { "id": "identity_274039", "image": "A-OKVQA/images/311229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows on the sign signify? Is it:\n(A) merge left\n(B) one way\n(C) turn right\n(D) go straight" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.1875, "loss": 4.1307783126831055 }, { "id": "identity_276885", "image": "PMC-VQA/images/PMC8127231_Fig1_443553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the foreign material shown in the image from the following options:\n(A) Fecal material\n(B) Contrast dye\n(C) Air bubbles\n(D) Unspecified material." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.734375, "loss": 4.500939846038818 }, { "id": "identity_33574", "image": "A-OKVQA/images/65488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the woman wearing the covering around her neck?\nChoices:\n(A) keeping warm\n(B) covering scar\n(C) vanity\n(D) style" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.796875, "loss": 4.564764499664307 }, { "id": "identity_324278", "image": "PMC-VQA/images/PMC8687640_f01_142757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could be the diagnosis based on this scan?\nChoices:\n(A) Fracture\n(B) Infection\n(C) Tumor\n(D) Arthritis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.765625, "loss": 4.820620536804199 }, { "id": "identity_216989", "image": "PMC-VQA/images/PMC4139810_ijms-15-11730-f005_314458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shape of the objects in the image?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 18.265625, "loss": 4.085855960845947 }, { "id": "identity_32150", "image": "A-OKVQA/images/206397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a process that is related to these animals?\nChoices:\n(A) nuclear fusion\n(B) shearing\n(C) soaring\n(D) photosynthesis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.015625, "loss": 4.413624286651611 }, { "id": "identity_211932", "image": "A-OKVQA/images/250344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candles are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.75, "loss": 4.7365031242370605 }, { "id": "identity_338535", "image": "A-OKVQA/images/408336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is this animal decorated?\nChoices:\n(A) white stripes\n(B) black stripes\n(C) all black\n(D) all white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.640625, "loss": 4.205543518066406 }, { "id": "identity_191850", "image": "A-OKVQA/images/104337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many athletes can be seen wearing red colored ski boots?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.265625, "loss": 4.510558605194092 }, { "id": "identity_21571", "image": "DVQA/images/bar_train_00110349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of mother in signal smaller than the value of list in feed?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.96875, "loss": 4.063207149505615 }, { "id": "identity_298405", "image": "ScienceQA/images/18872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which region did Lincoln receive the most votes? \nChoices:\n(A) the Northwest\n(B) the South" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.75, "loss": 4.287714004516602 }, { "id": "identity_199877", "image": "VizWiz/images/VizWiz_train_00023586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many icons are visible on the computer screen?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 28.8125, "loss": 4.1157941818237305 }, { "id": "identity_303455", "image": "TQA/images/abc_question_images/photosynthesis_17481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the carbon dioxide?\nChoices:\n(A) F\n(B) C\n(C) J\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.546875, "loss": 4.572537422180176 }, { "id": "identity_288062", "image": "PMC-VQA/images/PMC6195733_Fig1_384145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique is utilized for image i-l?\nOptions:\n(A) T1 weighted imaging\n(B) T2 weighted imaging\n(C) Diffusion weighted imaging\n(D) Functional magnetic resonance imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.148775100708008 }, { "id": "identity_1230", "image": "IconQA/images/22054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.639546871185303 }, { "id": "identity_322634", "image": "ScienceQA/images/4898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Honolulu\n(B) Juneau\n(C) Santa Fe\n(D) Fairbanks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.234375, "loss": 4.294856071472168 }, { "id": "identity_29977", "image": "IconQA/images/106608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.59375, "loss": 4.0221357345581055 }, { "id": "identity_30628", "image": "ScienceQA/images/16373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these states is farthest south?\nChoices:\n(A) Connecticut\n(B) Alabama\n(C) Pennsylvania\n(D) Kentucky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5625, "loss": 4.0653181076049805 }, { "id": "identity_6614", "image": "FigureQA/images/42978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.46875, "loss": 4.163300514221191 }, { "id": "identity_243455", "image": "PMC-VQA/images/PMC6086413_pone.0201430.g004_352601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process was likely used to prepare the specimens in the image, and what are the key indicators that support this conclusion?" }, { "from": "gpt", "value": "The answer is Heating process, indicated by texture and structural changes" } ], "clip_score": 27.421875, "loss": 4.17249870300293 }, { "id": "identity_324845", "image": "IconQA/images/6159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sub. Which one is third?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.273104667663574 }, { "id": "identity_202482", "image": "A-OKVQA/images/542933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players can be counted in the dugout of the team wearing maroon?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 26.875, "loss": 4.759866237640381 }, { "id": "identity_296748", "image": "AI2D/images/1267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which process supplies light energy to the plant?\nChoices:\n(A) B\n(B) A\n(C) D\n(D) C" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.28125, "loss": 4.0481414794921875 }, { "id": "identity_337510", "image": "A-OKVQA/images/234994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is a legendary figure for one of these teams?\nChoices:\n(A) michael jordan\n(B) leroy garrett\n(C) todd helton\n(D) tom brady" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.359375, "loss": 4.369969844818115 }, { "id": "identity_77546", "image": "TQA/images/question_images/ocean_zones_7138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the limit of growth for attached plants as shown in the image?" }, { "from": "gpt", "value": "The answer is Irregular and sometimes rocky bottom" } ], "clip_score": 22.5, "loss": 4.166744709014893 }, { "id": "identity_21345", "image": "PMC-VQA/images/PMC6248249_f1_399487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of maps are shown in the image? \nChoices:\n(A) Political and climate maps \n(B) Bullseye and treasure maps \n(C) Sea and mountain maps \n(D) Road and street maps. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.6640625, "loss": 4.166156768798828 }, { "id": "identity_188660", "image": "A-OKVQA/images/447179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated in the background?" }, { "from": "gpt", "value": "The answer is Fourteen" } ], "clip_score": 13.421875, "loss": 4.0817952156066895 }, { "id": "identity_115601", "image": "TextVQA/images/ee72c08a127be37f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the machine's exterior?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.890625, "loss": 4.283234119415283 }, { "id": "identity_119759", "image": "TextVQA/images/1481db31b19b9381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many athletes are listed for Heat 4?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.703125, "loss": 4.148714065551758 }, { "id": "identity_18253", "image": "DVQA/images/bar_train_00193347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across all the datasets?" }, { "from": "gpt", "value": "The answer is sphere" } ], "clip_score": 31.640625, "loss": 4.628624439239502 }, { "id": "identity_298849", "image": "TextVQA/images/86401c9659fc9f6b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For what purpose is the ad in the background?" }, { "from": "gpt", "value": "The answer is no text in image" } ], "clip_score": 15.8828125, "loss": 4.344938278198242 }, { "id": "identity_34047", "image": "PMC-VQA/images/PMC6723115_F1_33045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white dotted line in the schematic represent? \nChoices:\n(A) The basal surface \n(B) The lateral surface \n(C) The apical surface \n(D) The luminal surface. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.875, "loss": 4.447513580322266 }, { "id": "identity_277923", "image": "FigureQA/images/1738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.53125, "loss": 4.468899726867676 }, { "id": "identity_276930", "image": "TextVQA/images/19703a54731b9656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign above the coffee pot?" }, { "from": "gpt", "value": "The answer is a" } ], "clip_score": 28.34375, "loss": 5.346522331237793 }, { "id": "identity_191391", "image": "PMC-VQA/images/PMC4321082_fig3_356602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the lesion indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 23.234375, "loss": 4.46986722946167 }, { "id": "identity_279123", "image": "A-OKVQA/images/146294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does Fly Emirates offer to the game? Is it:\n(A) drink\n(B) food\n(C) sponsor\n(D) transportation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.2109375, "loss": 4.082446098327637 }, { "id": "identity_281558", "image": "PMC-VQA/images/PMC7084207_ijms-21-01887-f002_138920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of inflammation depicted in the image. Is it:\n(A) acute inflammation\n(B) chronic inflammation\n(C) granulomatous inflammation\n(D) fibrotic inflammation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.546875, "loss": 4.518612861633301 }, { "id": "identity_219581", "image": "VQA2.0/images/377756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signals are visible?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.46875, "loss": 4.483267784118652 }, { "id": "identity_297838", "image": "PMC-VQA/images/PMC7262938_tca13421-fig-0001_181563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the finding in the right upper lobe on the CT scan?\nChoices:\n(A) Emphysema\n(B) Hemorrhage\n(C) Lung cancer\n(D) Infection." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.25, "loss": 4.451108455657959 }, { "id": "identity_302531", "image": "A-OKVQA/images/216325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of surfboard is the person on the far left holding?\nChoices:\n(A) funboard\n(B) fish\n(C) longboard\n(D) short board" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.296875, "loss": 4.190939426422119 }, { "id": "identity_325538", "image": "PMC-VQA/images/PMC3564814_F1_183850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in the image? \nChoices:\n(A) The upper pole of a kidney \n(B) The lower pole of a kidney \n(C) The middle pole of a kidney \n(D) The middle pole of another kidney" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.240260601043701 }, { "id": "identity_247403", "image": "IconQA/images/38026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that you will pick two pink ones?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 33.78125, "loss": 4.38416051864624 }, { "id": "identity_217441", "image": "PMC-VQA/images/PMC8864031_Fig8_199970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct areas are marked with yellow arrows in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.78125, "loss": 4.24218225479126 }, { "id": "identity_277864", "image": "TQA/images/question_images/human_system_circulatory_1382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the consequence of the absence of the right atrium in the circulatory system?\nChoices:\n(A) Nothing happens\n(B) No one receives deoxygenated blood from the body through the vena cava and pumps it into the right ventricle which then sends it to the lungs to be oxygenated\n(C) Left Atrium receives deoxygenated blood from the body through the vena cava and pumps it into the right ventricle which then sends it to the lungs to be oxygenated\n(D) It is still the same" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 4.06966495513916 }, { "id": "identity_22791", "image": "TabMWP/images/6453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.0859785079956055 }, { "id": "identity_287891", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f006_217801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of numbers 1 through 3 in the image provided?\nChoices:\n(A) The position of stenotic PV orifice \n(B) The size of the left coronary artery \n(C) The PV leaflets \n(D) The position of right coronary ostium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.828125, "loss": 4.508913993835449 }, { "id": "identity_337349", "image": "PMC-VQA/images/PMC5126392_fig4_99947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical structure is shown in this image?\nChoices:\n(A) Lumbar vertebra\n(B) Sacrum\n(C) Cervical vertebra\n(D) Thoracic vertebra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.84375, "loss": 4.442019939422607 }, { "id": "identity_301580", "image": "AI2D/images/1895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal would be affected if there is a decrease in snakes?\nChoices:\n(A) hawks and foxes\n(B) insecta and lizards\n(C) scorpion\n(D) rodents" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.71875, "loss": 4.005197048187256 }, { "id": "identity_290525", "image": "PMC-VQA/images/PMC7448131_fig2-2050313X20952996_240208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the abscess situated?\nChoices:\n(A) Right axilla\n(B) Left axilla\n(C) Abdomen\n(D) Chest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 4.391088962554932 }, { "id": "identity_202623", "image": "A-OKVQA/images/199764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the canopy covering the stall?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.25, "loss": 4.011315822601318 }, { "id": "identity_222562", "image": "Super-CLEVR/images/superCLEVR_new_001588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the airplane located to the left of the purple matte object?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.875, "loss": 4.063756942749023 }, { "id": "identity_223612", "image": "PMC-VQA/images/PMC2410174_fig1_23923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the abnormality observed in the brain in this image?" }, { "from": "gpt", "value": "The answer is Irregular and angular" } ], "clip_score": 29.296875, "loss": 4.608003616333008 }, { "id": "identity_3520", "image": "ScienceQA/images/7258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Reno\n(B) Carson City\n(C) Baton Rouge\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.258232116699219 }, { "id": "identity_278927", "image": "VizWiz/images/VizWiz_train_00003952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you please provide a description of the photo?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 24.015625, "loss": 5.507837295532227 }, { "id": "identity_148357", "image": "DocVQA/images/rnjf0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do multiple groups not trust standard territory reps for?" }, { "from": "gpt", "value": "The answer is detailed information on PPAR activation" } ], "clip_score": 25.78125, "loss": 4.935393333435059 }, { "id": "identity_333891", "image": "ScienceQA/images/17047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Newport\n(B) Tallahassee\n(C) Tucson\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.90625, "loss": 4.00865364074707 }, { "id": "identity_201967", "image": "PMC-VQA/images/PMC6462822_f0045_465006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain slices are shown in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.703125, "loss": 4.327089786529541 }, { "id": "identity_285838", "image": "A-OKVQA/images/181836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what social cause has the street been closed off for celebration?\nChoices:\n(A) anti-war\n(B) anti-racism\n(C) lgbt rights\n(D) police reform" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.171875, "loss": 4.001356601715088 }, { "id": "identity_199922", "image": "TextVQA/images/0c9cfd577d3a0f1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many amusement rides can be identified in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.953125, "loss": 4.4656782150268555 }, { "id": "identity_303080", "image": "AI2D/images/1961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option will disappear if all planktivorous fish die?\nChoices:\n(A) diatoms\n(B) bacteria\n(C) piscivorous fish\n(D) cnidaria ctenophora" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.8125, "loss": 4.511613845825195 }, { "id": "identity_121190", "image": "Super-CLEVR/images/superCLEVR_new_000895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located between the gold and shiny bicycles?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.96875, "loss": 5.095558166503906 }, { "id": "identity_46036", "image": "FigureQA/images/33751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 28.484375, "loss": 4.062668323516846 }, { "id": "identity_328989", "image": "TabMWP/images/35355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.984375, "loss": 4.231080055236816 }, { "id": "identity_196977", "image": "PMC-VQA/images/PMC6937462_FIG7_92493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of inflammation in the sinuses?" }, { "from": "gpt", "value": "The answer is No visible signs" } ], "clip_score": 26.515625, "loss": 4.523667812347412 }, { "id": "identity_126670", "image": "TextVQA/images/ef5d49ad66e286f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Thomas Kellein" } ], "clip_score": 22.65625, "loss": 5.700438499450684 }, { "id": "identity_220189", "image": "A-OKVQA/images/546378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of book is to the left of the cat?" }, { "from": "gpt", "value": "The answer is Dictionary" } ], "clip_score": 29.546875, "loss": 4.952959060668945 }, { "id": "identity_205149", "image": "PMC-VQA/images/PMC5265979_F3_119160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of disc pathology is indicated by the red arrow?" }, { "from": "gpt", "value": "The answer is Discitis" } ], "clip_score": 30.640625, "loss": 4.294520854949951 }, { "id": "identity_30275", "image": "PMC-VQA/images/PMC3726502_F1_221078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI was acquired for each examination? \nChoices:\n(A) CT scan \n(B) T1 weighted \n(C) T2 weighted \n(D) PET scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.765625, "loss": 4.131679534912109 }, { "id": "identity_318154", "image": "PMC-VQA/images/PMC5135722_Fig1_101981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the intracerebral hematoma located in the brain?\nChoices:\n(A) left hemisphere\n(B) right hemisphere\n(C) cerebellum\n(D) occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.078125, "loss": 4.3357110023498535 }, { "id": "identity_10278", "image": "FigureQA/images/97576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Blue have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.296875, "loss": 4.462636470794678 }, { "id": "identity_223326", "image": "Super-CLEVR/images/superCLEVR_new_001873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.984375, "loss": 4.371496200561523 }, { "id": "identity_19987", "image": "TextVQA/images/0f2e50dac6fcb058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who plays number 14?" }, { "from": "gpt", "value": "The answer is lorance" } ], "clip_score": 15.2265625, "loss": 4.565406322479248 }, { "id": "identity_293261", "image": "TabMWP/images/8534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day did Sasha watch the most TV out of the past 4 days?\nChoices:\n(A) Tuesday\n(B) Wednesday\n(C) Thursday\n(D) Friday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.375, "loss": 4.203525543212891 }, { "id": "identity_331332", "image": "ScienceQA/images/439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boston\n(B) Omaha\n(C) Dover\n(D) Annapolis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.328125, "loss": 4.504915237426758 }, { "id": "identity_336892", "image": "A-OKVQA/images/429793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the laptop on top of the desk near the potted flowers?\nChoices:\n(A) gray\n(B) blue\n(C) black\n(D) red" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.65625, "loss": 4.586245536804199 }, { "id": "identity_33245", "image": "ScienceQA/images/14999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following best describes a population on an island in the Bahamas?\nChoices:\n(A) the rocky soil and the small trees\n(B) the brown anole lizards\n(C) the Bahamian boa constrictors and the curly-tailed lizards" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.068543910980225 }, { "id": "identity_63045", "image": "ScienceQA/images/16711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible in this map, excluding the highlighted one?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.84375, "loss": 4.742434501647949 }, { "id": "identity_36674", "image": "PMC-VQA/images/PMC7502059_Fig2_257138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is being imaged? \nChoices:\n(A) Right leg \n(B) Left leg \n(C) Hip \n(D) Shoulder " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5859375, "loss": 4.492733478546143 }, { "id": "identity_309981", "image": "A-OKVQA/images/331686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is worn by everyone who rides these motorcycles?\nChoices:\n(A) cowboy hats\n(B) police badge\n(C) biker jeans\n(D) rubber vest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.625, "loss": 4.386016845703125 }, { "id": "identity_187464", "image": "TextVQA/images/9799b1fe5a9a13c4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phones are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.421875, "loss": 4.358227729797363 }, { "id": "identity_307618", "image": "PMC-VQA/images/PMC6218971_Fig2_391723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was used to detect these? \nChoices:\n(A) Vertical scanning image \n(B) Magnetic resonance imaging \n(C) Horizontal scanning image \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.476385593414307 }, { "id": "identity_116391", "image": "TextVQA/images/9ed747e4f0f42930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the newspaper advertised on the vending machine?" }, { "from": "gpt", "value": "The answer is Toronto Sun" } ], "clip_score": 17.96875, "loss": 4.1196184158325195 }, { "id": "identity_120327", "image": "VQA-AS/images/8719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rabbits are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.34375, "loss": 4.379554271697998 }, { "id": "identity_332649", "image": "PMC-VQA/images/PMC6798454_Fig2_52632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the scale bars indicate?\nChoices:\n(A) Distance between the cells \n(B) Size of the region of interest \n(C) Magnification of the image \n(D) Time of recording" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.390625, "loss": 4.227778434753418 }, { "id": "identity_9747", "image": "DVQA/images/bar_train_00139394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 18.453125, "loss": 4.9153947830200195 }, { "id": "identity_266189", "image": "Geometry3K/images/1394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the value of E G?\nOptions:\n(A) 5\n(B) 6.5\n(C) 10\n(D) 13" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.203125, "loss": 4.285506725311279 }, { "id": "identity_132001", "image": "Super-CLEVR/images/superCLEVR_new_001845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the scooter?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 24.25, "loss": 4.353204250335693 }, { "id": "identity_25877", "image": "PMC-VQA/images/PMC7790321_FIG7_336988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is denoted by the yellow arrows in the image? \nChoices:\n(A) Soft tissue growth. \n(B) Implants. \n(C) Bone formation. \n(D) Tooth decay. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.125, "loss": 4.992275238037109 }, { "id": "identity_276064", "image": "AI2D/images/4311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the sun from the following options:\n(A) Label I\n(B) Label C\n(C) Label J\n(D) Label B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0625, "loss": 4.679263591766357 }, { "id": "identity_285793", "image": "PMC-VQA/images/PMC2963743_F0003_76794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To what region does the tubular channel extend up to? \nChoices:\n(A) Chest region\n(B) Neck region\n(C) Umbilical region\n(D) Pelvic region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.4375, "loss": 4.384576320648193 }, { "id": "identity_293041", "image": "FigureQA/images/33209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Violet Red or Cyan?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.015625, "loss": 4.541292190551758 }, { "id": "identity_116315", "image": "TextVQA/images/12e6ffc26e9ba34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many checkered flags are visible on the sweater?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.109375, "loss": 4.504842758178711 }, { "id": "identity_78459", "image": "AI2D/images/1507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geological feature is formed at stage 3?" }, { "from": "gpt", "value": "The answer is Rock columns" } ], "clip_score": 32.84375, "loss": 5.5127129554748535 }, { "id": "identity_106399", "image": "VQA2.0/images/339547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any traffic lights visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, one traffic light" } ], "clip_score": 23.015625, "loss": 4.038278102874756 }, { "id": "identity_19188", "image": "FigureQA/images/85481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Seafoam have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.560081481933594 }, { "id": "identity_327868", "image": "PMC-VQA/images/PMC7344590_diagnostics-10-00385-f001_207844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used for the diagnosis?\nChoices:\n(A) MRI\n(B) CT scan\n(C) PET scan\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.078125, "loss": 4.742438316345215 }, { "id": "identity_291889", "image": "FigureQA/images/64866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.681395530700684 }, { "id": "identity_335611", "image": "AI2D/images/4186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the process at this stage called?\nChoices:\n(A) uplift\n(B) heat and pressure\n(C) cooling\n(D) melting" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.046875, "loss": 4.495280742645264 }, { "id": "identity_334008", "image": "FigureQA/images/71726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.144952774047852 }, { "id": "identity_188377", "image": "PMC-VQA/images/PMC4801634_sensors-16-00258-f006_13398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What structural feature is highlighted within the red circle?" }, { "from": "gpt", "value": "The answer is Blood vessels" } ], "clip_score": 26.078125, "loss": 4.117110729217529 }, { "id": "identity_35121", "image": "TQA/images/abc_question_images/parts_worm_17302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is representing the mouth in this diagram?\nChoices:\n(A) H\n(B) N\n(C) R\n(D) F" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.859375, "loss": 4.3539252281188965 }, { "id": "identity_308035", "image": "FigureQA/images/80571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.7421875, "loss": 4.691276550292969 }, { "id": "identity_112075", "image": "VQA2.0/images/535278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the filing cabinet to the left of the woman?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.421875, "loss": 4.445621490478516 }, { "id": "identity_126175", "image": "PMC-VQA/images/PMC6282431_F0003_408834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of joint is visible in the radiography?" }, { "from": "gpt", "value": "The answer is A hinge joint" } ], "clip_score": 33.15625, "loss": 4.12570858001709 }, { "id": "identity_338084", "image": "PMC-VQA/images/PMC7947347_F1_387981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What subdivisions of the right hemisphere are depicted in the image?\nChoices:\n(A) Cortical subdivisions with thalamic connections \n(B) Thalamic subdivisions with cortical connections \n(C) Subcortical subdivisions with thalamic connections \n(D) Subcortical subdivisions with cortical connections" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.59375, "loss": 4.121770858764648 }, { "id": "identity_25007", "image": "PMC-VQA/images/PMC4506045_pone.0133375.g006_407017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the insert indicating in the image? \nChoices:\n(A) Medial-Lateral axis sections \n(B) Anterior-Posterior axis sections \n(C) Proximal-Distal axis sections \n(D) Dorsal-Ventral axis sections. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.2121148109436035 }, { "id": "identity_336583", "image": "IconQA/images/52985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.390625, "loss": 4.373574256896973 }, { "id": "identity_27662", "image": "A-OKVQA/images/574940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cloth is good for window curtains?\nChoices:\n(A) linen\n(B) silk\n(C) nylon\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.217851161956787 }, { "id": "identity_29906", "image": "PMC-VQA/images/PMC5379728_Fig1_149081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the modality used to acquire the image? \nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.21875, "loss": 4.398173809051514 }, { "id": "identity_184505", "image": "PMC-VQA/images/PMC5944956_pone.0196481.g001_312614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the letter 'C' in the image?" }, { "from": "gpt", "value": "The answer is Sequence marker in a set of images" } ], "clip_score": 12.421875, "loss": 4.670915126800537 }, { "id": "identity_111346", "image": "TextVQA/images/03577bee21c7d19e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the banner to the left of the speaker?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 13.9140625, "loss": 4.130143165588379 }, { "id": "identity_199165", "image": "PMC-VQA/images/PMC5615764_Fig1_220556.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the center of the image, surrounded by ribs?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 24.765625, "loss": 4.056412696838379 }, { "id": "identity_320260", "image": "PMC-VQA/images/PMC8311273_fig8_20035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used here?\nChoices:\n(A) Computed Tomography \n(B) Positron Emission Tomography \n(C) X-Ray \n(D) Magnetic Resonance Imaging" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.234375, "loss": 4.133334159851074 }, { "id": "identity_64587", "image": "AI2D/images/117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the North Pole arrow pointing towards?" }, { "from": "gpt", "value": "The answer is Upwards" } ], "clip_score": 35.15625, "loss": 4.146061420440674 }, { "id": "identity_32126", "image": "A-OKVQA/images/533889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lines are located above the street?\nChoices:\n(A) cell\n(B) power\n(C) water\n(D) sewage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.15625, "loss": 4.440305709838867 }, { "id": "identity_319948", "image": "PMC-VQA/images/PMC2566560_F1_28707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the finger is affected here?\nChoices:\n(A) Proximal phalanx\n(B) Intermediate phalanx\n(C) Distal phalanx\n(D) Metacarpals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.3125, "loss": 4.3136396408081055 }, { "id": "identity_24294", "image": "PMC-VQA/images/PMC3808353_pone-0078309-g003_239464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown with a solid line in the image? \nChoices:\n(A) The ventral border of the dorsal horn \n(B) The dorsal border of the dorsal horn \n(C) The boundary between laminae II and III \n(D) The boundary between laminae III and IV. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.53125, "loss": 4.068341255187988 }, { "id": "identity_313323", "image": "Super-CLEVR/images/superCLEVR_new_001376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small metal thing behind the large metallic object shaped like the blue object in front of the matte scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.546875, "loss": 5.0868425369262695 }, { "id": "identity_334062", "image": "A-OKVQA/images/475988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these motorbikes arranged in a row?\nChoices:\n(A) for repair\n(B) for show\n(C) for amusement\n(D) coincidence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.421875, "loss": 4.145197868347168 }, { "id": "identity_16047", "image": "FigureQA/images/29760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam greater than Maroon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.032238960266113 }, { "id": "identity_308481", "image": "ScienceQA/images/2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) slippery\n(B) blue\n(C) stretchy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.515625, "loss": 4.2164483070373535 }, { "id": "identity_315672", "image": "FigureQA/images/84071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.65625, "loss": 4.0687971115112305 }, { "id": "identity_133205", "image": "VQA2.0/images/403910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the largest object on the desk?" }, { "from": "gpt", "value": "The answer is The white storage drawers" } ], "clip_score": 21.578125, "loss": 4.187678813934326 }, { "id": "identity_310236", "image": "A-OKVQA/images/413666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which season are the skateboarders skating in the park?\nChoices:\n(A) summer\n(B) fall\n(C) spring\n(D) winter" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.875, "loss": 4.119496822357178 }, { "id": "identity_328099", "image": "A-OKVQA/images/259535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing with the frisbee?\nChoices:\n(A) catching it\n(B) tossing it\n(C) spinning it\n(D) cleaning it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.984375, "loss": 4.379947662353516 }, { "id": "identity_27468", "image": "A-OKVQA/images/195296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the bananas doing on the yellow cloth?\nChoices:\n(A) being dried\n(B) being cooked\n(C) being sold\n(D) being eaten" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.75, "loss": 4.495980262756348 }, { "id": "identity_267955", "image": "Super-CLEVR/images/superCLEVR_new_001134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the green bicycle and the purple matte bicycle of equal size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.59375, "loss": 5.436863899230957 }, { "id": "identity_269983", "image": "PMC-VQA/images/PMC7319815_ytaa086-F1_200240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of prosthesis is depicted in the image provided? \nOptions:\n(A) Biological Prosthesis\n(B) Mechanical Prosthesis\n(C) Tissue-engineered Prosthesis\n(D) Autograft Prosthesis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.015625, "loss": 4.541769981384277 }, { "id": "identity_246772", "image": "DocVQA/images/zjlk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: List all the editors from Wyoming and provide their respective newspapers and cities" }, { "from": "gpt", "value": "The answer is Robert J. Dreher, Becky Sheller, Jack Branseom, M. Pat Schmidt, Dave Bonner, Hugh Knoefel" } ], "clip_score": 28.453125, "loss": 4.469630241394043 }, { "id": "identity_191269", "image": "VQA-AS/images/12770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.625, "loss": 4.238490581512451 }, { "id": "identity_183671", "image": "PMC-VQA/images/PMC7737277_Fig2_321109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orange squares are used to mark the area of interest?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.390625, "loss": 4.1904120445251465 }, { "id": "identity_9111", "image": "AI2D/images/3607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is directly below the larynx?\nChoices:\n(A) Nasal Cavity\n(B) Alveoli\n(C) Trachea\n(D) Diaohragm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.9375, "loss": 4.491978168487549 }, { "id": "identity_288697", "image": "ScienceQA/images/10850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\n(A) Phoenix\n(B) Honolulu\n(C) Juneau\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.4375, "loss": 4.348532676696777 }, { "id": "identity_131873", "image": "VQA2.0/images/288662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of water activity is depicted in the image?" }, { "from": "gpt", "value": "The answer is Jet skiing" } ], "clip_score": 27.890625, "loss": 4.411688327789307 }, { "id": "identity_28", "image": "AI2D/images/2595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter represents July on Earth?\nChoices:\n(A) E\n(B) H\n(C) G\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.828125, "loss": 4.21375036239624 }, { "id": "identity_244932", "image": "A-OKVQA/images/36915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which buildings are likely to offer the most protection from a tsunami, considering their height and distance from the shoreline?" }, { "from": "gpt", "value": "The answer is right" } ], "clip_score": 20.796875, "loss": 4.0546441078186035 }, { "id": "identity_4085", "image": "DVQA/images/bar_train_00171335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the sixth group of bars from the left?" }, { "from": "gpt", "value": "The answer is summer" } ], "clip_score": 14.0703125, "loss": 4.093191623687744 }, { "id": "identity_264174", "image": "VizWiz/images/VizWiz_train_00002993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this magazine?" }, { "from": "gpt", "value": "The answer is veterinary health hospital" } ], "clip_score": 34.40625, "loss": 4.210033416748047 }, { "id": "identity_317779", "image": "ScienceQA/images/10258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Trenton\n(B) Phoenix\n(C) Manchester\n(D) Harrisburg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.625, "loss": 4.061518669128418 }, { "id": "identity_73559", "image": "TQA/images/question_images/fossils_9107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of organism is being fossilized in this image?" }, { "from": "gpt", "value": "The answer is Fish" } ], "clip_score": 28.296875, "loss": 4.319835186004639 }, { "id": "identity_119268", "image": "TextVQA/images/07dcc033d9b5a027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the phone's back cover?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 27.671875, "loss": 4.186713218688965 }, { "id": "identity_15754", "image": "DocVQA/images/skcg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the addressee of this letter?" }, { "from": "gpt", "value": "The answer is Mike" } ], "clip_score": 19.328125, "loss": 4.468923091888428 }, { "id": "identity_290490", "image": "A-OKVQA/images/246927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the small cream-colored container contain? Is it:\n(A) sugar substitute\n(B) coffee creamer\n(C) pepper\n(D) salt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.8828125, "loss": 4.4320526123046875 }, { "id": "identity_33495", "image": "PMC-VQA/images/PMC6408898_RSPB20182733F2_447708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the blue color in the lesion-symptom map? \nChoices:\n(A) Change in GCC thickness in the lower quadrant \n(B) Change in GCC thickness in the upper quadrant \n(C) No change in GCC thickness \n(D) Change in GCC thickness in both the quadrants. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.232119560241699 }, { "id": "identity_3254", "image": "A-OKVQA/images/156969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mode of transportation is upside-down?\nChoices:\n(A) bicycle\n(B) skateboard\n(C) scooter\n(D) moped" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.453125, "loss": 4.06888484954834 }, { "id": "identity_184030", "image": "PMC-VQA/images/PMC3116541_F0003_99168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary blood vessels are branching out from the central structure?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.140625, "loss": 4.297127723693848 }, { "id": "identity_207432", "image": "A-OKVQA/images/54039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are covered with protective sheets?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.984375, "loss": 4.614571571350098 }, { "id": "identity_335631", "image": "A-OKVQA/images/331082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals doing here?\nChoices:\n(A) racing\n(B) protesting\n(C) jogging\n(D) shopping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.1875, "loss": 4.216304779052734 }, { "id": "identity_31583", "image": "FigureQA/images/1957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.468719482421875 }, { "id": "identity_134965", "image": "DocVQA/images/jxcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 22.828125, "loss": 4.314820766448975 }, { "id": "identity_144710", "image": "DocVQA/images/mlyg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the document suggesting the members to do before the meeting?" }, { "from": "gpt", "value": "The answer is Give thought and offer suggestions" } ], "clip_score": 22.078125, "loss": 5.010780334472656 }, { "id": "identity_276300", "image": "PMC-VQA/images/PMC3605106_F2_193660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During fundoscopy for edema, which eye was examined: the right eye, the left eye, both eyes, or neither?\nChoices:\n(A) Right eye \n(B) Left eye \n(C) Both eyes \n(D) Neither" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.703125, "loss": 4.1854963302612305 }, { "id": "identity_272433", "image": "IconQA/images/2772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one in the sequence, following the first picture of a bear?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.46875, "loss": 4.030903339385986 }, { "id": "identity_138995", "image": "DocVQA/images/jrxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient of this letter?" }, { "from": "gpt", "value": "The answer is Richard Pope" } ], "clip_score": 22.296875, "loss": 4.722390651702881 }, { "id": "identity_213913", "image": "VQA2.0/images/462911.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the catcher's uniform?" }, { "from": "gpt", "value": "The answer is Dark blue or black" } ], "clip_score": 24.28125, "loss": 4.015793323516846 }, { "id": "identity_303338", "image": "FigureQA/images/5262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.234375, "loss": 4.0930280685424805 }, { "id": "identity_289571", "image": "A-OKVQA/images/282659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the target of the person holding the wooden item?\nChoices:\n(A) fly\n(B) homerun\n(C) ant\n(D) pizza dough" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.28125, "loss": 4.809508323669434 }, { "id": "identity_205641", "image": "VQA-AS/images/6846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What objects are the individuals on the bench interacting with?" }, { "from": "gpt", "value": "The answer is Bread or food" } ], "clip_score": 25.25, "loss": 4.132955074310303 }, { "id": "identity_323167", "image": "PMC-VQA/images/PMC9207413_F5_315000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black arrow in the image indicate? \nChoices:\n(A) Accumulated fat \n(B) Accumulated protein \n(C) Accumulated glycogen \n(D) Accumulated vitamin C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.390625, "loss": 4.325486660003662 }, { "id": "identity_31725", "image": "FigureQA/images/84259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.046875, "loss": 4.39724063873291 }, { "id": "identity_197350", "image": "A-OKVQA/images/52966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the man's shirt in the middle?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 13.90625, "loss": 4.045450687408447 }, { "id": "identity_286736", "image": "DVQA/images/bar_train_00132018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label is located third from the left?" }, { "from": "gpt", "value": "The answer is remark" } ], "clip_score": 20.890625, "loss": 4.974989891052246 }, { "id": "identity_959", "image": "A-OKVQA/images/188587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aircraft type is this?\nChoices:\n(A) jet\n(B) biplane\n(C) helicopter\n(D) seaplane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.796875, "loss": 4.194581985473633 }, { "id": "identity_219295", "image": "TextVQA/images/ef2984aa210590d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals can be seen in the crowd within the illustration?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 15.4140625, "loss": 4.513433933258057 }, { "id": "identity_34539", "image": "A-OKVQA/images/197167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The narrowest visible point of the board is pointing in what direction?\nChoices:\n(A) west\n(B) south\n(C) north\n(D) east" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.5625, "loss": 4.245304584503174 }, { "id": "identity_268551", "image": "FigureQA/images/82982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.123403072357178 }, { "id": "identity_4585", "image": "IconQA/images/49971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.03125, "loss": 4.110304832458496 }, { "id": "identity_313830", "image": "ScienceQA/images/6777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Biloxi\n(B) Frankfort\n(C) Jackson\n(D) Raleigh" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.03125, "loss": 4.098267555236816 }, { "id": "identity_1418", "image": "A-OKVQA/images/132816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This child has what on their face?\nChoices:\n(A) mud\n(B) vinegar\n(C) sugar\n(D) carrot juice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.609375, "loss": 4.052447319030762 }, { "id": "identity_277814", "image": "TQA/images/abc_question_images/parts_neuron_17227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part receives nerve impulses from other cells among the options M, A, S, and U?\nChoices:\n(A) M\n(B) A\n(C) S\n(D) U" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.34375, "loss": 4.07737398147583 }, { "id": "identity_279323", "image": "FigureQA/images/90488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.515625, "loss": 4.482181549072266 }, { "id": "identity_297206", "image": "A-OKVQA/images/172886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which common bathroom item that is usually freestanding is built into the wall?\nChoices:\n(A) plumbing\n(B) hair dryer\n(C) mirror\n(D) sink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.421875, "loss": 4.525382995605469 }, { "id": "identity_135633", "image": "DocVQA/images/txdw0217_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the second column of the table?" }, { "from": "gpt", "value": "The answer is Effectiveness in Treating Hot F" } ], "clip_score": 19.53125, "loss": 5.087832450866699 }, { "id": "identity_142242", "image": "ChartQA/images/multi_col_40076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which characteristic has the highest percentage of respondents who disagree?" }, { "from": "gpt", "value": "The answer is Is intelligent" } ], "clip_score": 27.390625, "loss": 4.623127460479736 }, { "id": "identity_306715", "image": "PMC-VQA/images/PMC7770766_Fig2_331531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which elements are being mapped in this image?\nChoices:\n(A) C, N, Fe, and S\n(B) C, O, Ti, and F\n(C) Na, Cl, K, and Ca\n(D) H, He, Li, and Be" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.453125, "loss": 4.655080795288086 }, { "id": "identity_294994", "image": "AI2D/images/1087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the extended part of a leaf among the following options: petiole, midrib, blade, or vein?\nChoices:\n(A) petiole\n(B) midrib\n(C) blade\n(D) vein" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.09375, "loss": 5.009541034698486 }, { "id": "identity_30712", "image": "IconQA/images/36592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is ninth?\nChoices:\n(A) train\n(B) bear\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.28125, "loss": 4.126650810241699 }, { "id": "identity_319569", "image": "Super-CLEVR/images/superCLEVR_new_001403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the tiny blue object behind the yellow metal bicycle and the brown object have a similar shape?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.921875, "loss": 5.213912010192871 }, { "id": "identity_6326", "image": "FigureQA/images/97219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.0, "loss": 4.18506383895874 }, { "id": "identity_17056", "image": "ScienceQA/images/6255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Augusta\n(B) Annapolis\n(C) Baltimore\n(D) Biloxi" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.325285911560059 }, { "id": "identity_229268", "image": "A-OKVQA/images/375750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sport is being played, and what specific feature of the court indicates this?" }, { "from": "gpt", "value": "The answer is Tennis, white lines and net" } ], "clip_score": 26.078125, "loss": 4.796160697937012 }, { "id": "identity_305524", "image": "A-OKVQA/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this umbrella being used for?\nChoices:\n(A) keeping dry\n(B) decoration\n(C) lighting\n(D) to dance" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.265625, "loss": 4.385330677032471 }, { "id": "identity_12618", "image": "VQA-RAD/images/synpic56388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the sulci blunted?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.765625, "loss": 4.183949947357178 }, { "id": "identity_328940", "image": "A-OKVQA/images/4704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What keeps the person's feet attached to the snowboard?\nChoices:\n(A) bindings\n(B) tape\n(C) tape\n(D) magnets" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.171875, "loss": 4.292491912841797 }, { "id": "identity_332141", "image": "PMC-VQA/images/PMC6538121_FIG1_485993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this MRI show?\nChoices:\n(A) Arthritic changes\n(B) Bone fracture\n(C) Lytic center with a ring of sclerosis in the tarsal bones\n(D) Soft tissue swelling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.9375, "loss": 4.188060760498047 }, { "id": "identity_22103", "image": "ScienceQA/images/4925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Salt Lake City\n(C) Phoenix\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.0625, "loss": 4.994771480560303 }, { "id": "identity_300787", "image": "A-OKVQA/images/245793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might a person buy a ticket for this train?\nChoices:\n(A) online\n(B) newsboy\n(C) inside building\n(D) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.34375, "loss": 4.110341548919678 }, { "id": "identity_335547", "image": "PMC-VQA/images/PMC9028518_insects-13-00317-f006_260904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the field of view in the images?\nChoices:\n(A) flower\n(B) stem\n(C) leaf\n(D) root" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.125, "loss": 4.292943954467773 }, { "id": "identity_302297", "image": "PMC-VQA/images/PMC5641062_F4_226991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the marker used to identify cells here?\nChoices:\n(A) Green asterisks \n(B) Red asterisks \n(C) Blue asterisks \n(D) Yellow asterisks." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.7421875, "loss": 4.263435363769531 }, { "id": "identity_110095", "image": "VQA-AS/images/17987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase is the moon in?" }, { "from": "gpt", "value": "The answer is First quarter" } ], "clip_score": 16.296875, "loss": 4.236558437347412 }, { "id": "identity_36933", "image": "ChartQA/images/4522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color denotes Rep Party?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.375, "loss": 4.442745208740234 }, { "id": "identity_25929", "image": "PMC-VQA/images/PMC7382807_Fig2_220368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the imaging tests? \nChoices:\n(A) They do not show any fractures \n(B) They do not show any abnormalities \n(C) They show a talocalcaneal coalition \n(D) They show a bony protuberance fracture " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0, "loss": 4.349222660064697 }, { "id": "identity_185961", "image": "PMC-VQA/images/PMC7232200_microorganisms-08-00582-f009_172750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the structure indicated by the circle in the image?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 20.640625, "loss": 4.036101818084717 }, { "id": "identity_187170", "image": "PMC-VQA/images/PMC5394153_Fig4_153497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any sinus cavities in the image?" }, { "from": "gpt", "value": "The answer is Maxillary sinuses visible" } ], "clip_score": 28.203125, "loss": 4.043827056884766 }, { "id": "identity_14511", "image": "VQA-AS/images/11633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the curtains?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 21.1875, "loss": 4.409237861633301 }, { "id": "identity_324821", "image": "TextVQA/images/00b788d91c0084f5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date shown here?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 11.578125, "loss": 4.065529823303223 }, { "id": "identity_72416", "image": "ScienceQA/images/20181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the sea urchin play in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer and prey" } ], "clip_score": 26.46875, "loss": 4.255505561828613 }, { "id": "identity_120428", "image": "VQA2.0/images/347037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many airplanes are visible in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.6875, "loss": 4.219798564910889 }, { "id": "identity_114568", "image": "PMC-VQA/images/PMC9344258_Fig8_362016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct chains or clusters of yellow spheres are present in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 28.15625, "loss": 4.276134490966797 }, { "id": "identity_206050", "image": "VQA-AS/images/7829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many panes are there in the window?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 16.984375, "loss": 4.719779968261719 }, { "id": "identity_214576", "image": "Super-CLEVR/images/superCLEVR_new_001291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.703125, "loss": 4.85398530960083 }, { "id": "identity_279795", "image": "ScienceQA/images/7649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is being emphasized in the following options?\n(A) the Pacific Ocean\n(B) the Arctic Ocean\n(C) the Atlantic Ocean\n(D) the Indian Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.546875, "loss": 4.07279109954834 }, { "id": "identity_35315", "image": "TextVQA/images/3bcd2d405bd973f9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the beer company advertising in the back?" }, { "from": "gpt", "value": "The answer is bud" } ], "clip_score": 15.28125, "loss": 4.193647861480713 }, { "id": "identity_206430", "image": "A-OKVQA/images/83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible in the image?" }, { "from": "gpt", "value": "The answer is No animals visible" } ], "clip_score": 14.1640625, "loss": 4.175674915313721 }, { "id": "identity_3227", "image": "PMC-VQA/images/PMC4391185_Fig3_376517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to take these images?\nChoices:\n(A) X-ray \n(B) Magnetic Resonance Imaging (MRI) \n(C) Computed Tomography (CT) \n(D) Positron Emission Tomography (PET) " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.875, "loss": 4.011048316955566 }, { "id": "identity_295165", "image": "PMC-VQA/images/PMC6172725_Fig6_378283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the pigment granules be seen in the electron microscopic image?\nChoices:\n(A) Yes \n(B) No \n(C) Only in the sensory cell \n(D) Only in the pigment cell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.78125, "loss": 4.239890098571777 }, { "id": "identity_215463", "image": "Super-CLEVR/images/superCLEVR_new_000019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car located to the right of the purple limousine?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.046875, "loss": 4.658457279205322 }, { "id": "identity_264540", "image": "FigureQA/images/74638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.265625, "loss": 4.012700080871582 }, { "id": "identity_198844", "image": "PMC-VQA/images/PMC4491482_F0001_402932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to structures in the radiograph?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.21875, "loss": 4.596333026885986 }, { "id": "identity_250391", "image": "PMC-VQA/images/PMC8005316_FIG3_405175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is depicted in the Sagittal T2-weighted TSE fat-saturated image, and what are the key features that help identify this region?" }, { "from": "gpt", "value": "The answer is Knee region" } ], "clip_score": 28.890625, "loss": 4.14179801940918 }, { "id": "identity_206758", "image": "A-OKVQA/images/289100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fruit placed at the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.96875, "loss": 4.168763637542725 }, { "id": "identity_283968", "image": "PMC-VQA/images/PMC4396166_fig2_377911.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which surgical procedure might be required for this condition?\nChoices:\n(A) Appendectomy\n(B) Hysterectomy\n(C) Knee replacement\n(D) Coronary bypass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.734375, "loss": 5.324768543243408 }, { "id": "identity_265498", "image": "VizWiz/images/VizWiz_train_00012687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the code shown in the captcha image?" }, { "from": "gpt", "value": "The answer is gfdg" } ], "clip_score": 17.671875, "loss": 4.231431484222412 }, { "id": "identity_24204", "image": "TQA/images/question_images/earth_poles_168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which does not describe the day?\nChoices:\n(A) sun never sets\n(B) sun never rises\n(C) long days\n(D) short days" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.796875, "loss": 4.02778959274292 }, { "id": "identity_64209", "image": "ScienceQA/images/16786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bottle depicted in the image full or empty?" }, { "from": "gpt", "value": "The answer is Not empty" } ], "clip_score": 24.828125, "loss": 4.424504280090332 }, { "id": "identity_214531", "image": "VQA2.0/images/64411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are lit in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.265625, "loss": 4.318005084991455 }, { "id": "identity_295222", "image": "A-OKVQA/images/25833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action most likely led to the presence of the large novelty check on the windshield of the double decker bus? \nChoices:\n(A) corporate sponsorship\n(B) lottery winnings\n(C) charitable donation\n(D) personal loan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.96875, "loss": 4.7365593910217285 }, { "id": "identity_220161", "image": "A-OKVQA/images/201252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the vendor behind the market stall?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.578125, "loss": 4.403905868530273 }, { "id": "identity_222079", "image": "VQA2.0/images/259104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the second remote control from the left?" }, { "from": "gpt", "value": "The answer is VIBE" } ], "clip_score": 29.84375, "loss": 5.606597423553467 }, { "id": "identity_220698", "image": "A-OKVQA/images/146830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the outfield?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.1875, "loss": 4.7216057777404785 }, { "id": "identity_115045", "image": "TextVQA/images/924f39f1c604e6e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrella in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.515625, "loss": 4.043332099914551 }, { "id": "identity_280949", "image": "FigureQA/images/46545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the highest option available?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.46875, "loss": 4.567011833190918 }, { "id": "identity_285069", "image": "A-OKVQA/images/413248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation is depicted in the image? \nOptions:\n(A) water\n(B) air\n(C) rail\n(D) road" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.65625, "loss": 4.630356311798096 }, { "id": "identity_270934", "image": "DVQA/images/bar_train_00027207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance or importance of the artist in the note?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.734375, "loss": 4.138065814971924 }, { "id": "identity_41019", "image": "FigureQA/images/57809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar has the highest value in the chart?" }, { "from": "gpt", "value": "The answer is Violet Red" } ], "clip_score": 27.90625, "loss": 4.361479759216309 }, { "id": "identity_338212", "image": "A-OKVQA/images/66691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is currently seated at the table in the front?\nChoices:\n(A) background person\n(B) no one\n(C) lady beth\n(D) cook" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.890625, "loss": 4.053104400634766 }, { "id": "identity_273147", "image": "A-OKVQA/images/339536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of buildings can be seen in the background? Are they:\n(A) high rises\n(B) cabanas\n(C) bungalows\n(D) low rises" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.328125, "loss": 4.429374694824219 }, { "id": "identity_41142", "image": "DVQA/images/bar_train_00093974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest combined value of all categories?" }, { "from": "gpt", "value": "The answer is cover" } ], "clip_score": 25.328125, "loss": 4.565624237060547 }, { "id": "identity_155095", "image": "ScienceQA/images/5682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object?" }, { "from": "gpt", "value": "The answer is Twisted loop" } ], "clip_score": 27.296875, "loss": 4.437642574310303 }, { "id": "identity_290599", "image": "PMC-VQA/images/PMC7298109_F1_193581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the head CT scan? \nChoices:\n(A) Abnormality in the gray matter \n(B) Abnormality in the white matter \n(C) Abnormality in the cerebrospinal fluid \n(D) Abnormality in the ventricles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.34375, "loss": 4.097331523895264 }, { "id": "identity_22919", "image": "PMC-VQA/images/PMC8586710_F11_110541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the visible structure on the leaf surface? \nChoices:\n(A) Stomata \n(B) Root hairs \n(C) Salt glands \n(D) Cuticle layers " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.59375, "loss": 4.814777374267578 }, { "id": "identity_72343", "image": "ScienceQA/images/2967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the kelp play in this marine ecosystem?" }, { "from": "gpt", "value": "The answer is Habitat and producer" } ], "clip_score": 29.703125, "loss": 4.256989002227783 }, { "id": "identity_125396", "image": "TextVQA/images/022be8ef346e3009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are carrying blue objects?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.125, "loss": 4.6388349533081055 }, { "id": "identity_3834", "image": "Super-CLEVR/images/superCLEVR_new_000751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a metallic bicycle; is it the same size as the red metal object in front of the tiny red thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.875, "loss": 4.763515472412109 }, { "id": "identity_20457", "image": "IconQA/images/43250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.713768005371094 }, { "id": "identity_305076", "image": "Super-CLEVR/images/superCLEVR_new_000413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same shape as this small red thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 10.1171875, "loss": 4.561833381652832 }, { "id": "identity_329705", "image": "PMC-VQA/images/PMC6052568_Fig4_343667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue color indicate in the t maps?\nChoices:\n(A) Reduced gray matter\n(B) Elevated [18F]-THK5351 binding\n(C) Healthy Controls\n(D) Partial Volume" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 4.991809368133545 }, { "id": "identity_6968", "image": "PMC-VQA/images/PMC6611056_tca13066-fig-0001_4492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging study used to visualize the lesion?\nChoices:\n(A) Magnetic resonance imaging (MRI). \n(B) Computed tomography (CT) scan. \n(C) X-ray. \n(D) Positron emission tomography (PET) scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.359375, "loss": 4.239670753479004 }, { "id": "identity_279841", "image": "Super-CLEVR/images/superCLEVR_new_001179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the purple metallic object to the right of the blue metal truck and the SUV in front of the purple tandem bike of the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 33.3125, "loss": 5.250807285308838 }, { "id": "identity_284052", "image": "AI2D/images/4280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What generates biogenic methane?\nOptions:\n(A) thermogenic methane\n(B) bacterial actions\n(C) methane seeps\n(D) permafrost" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.196703910827637 }, { "id": "identity_261776", "image": "PMC-VQA/images/PMC6699134_Fig4_25176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image provided? \nOptions:\n(A) Cell division \n(B) Transmigration of cancer cells \n(C) Blood circulation \n(D) Brain surgery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.765625, "loss": 4.972267150878906 }, { "id": "identity_321070", "image": "PMC-VQA/images/PMC8268606_Fig1_5131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these lesions observed?\nChoices:\n(A) Left hemisphere\n(B) Right hemisphere\n(C) Both hemispheres\n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.96875, "loss": 4.155078411102295 }, { "id": "identity_309837", "image": "TQA/images/abc_question_images/optics_lense_types_19161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the label Y indicate?\nChoices:\n(A) BLIND SPOT\n(B) VANISHING POINT\n(C) DISTANCE\n(D) FOCUS" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.203125, "loss": 4.567967414855957 }, { "id": "identity_15148", "image": "ScienceQA/images/19485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the picture. Which word best describes the sound this water makes?\nChoices:\n(A) dripping\n(B) snapping\n(C) growling" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.84375, "loss": 4.260144233703613 }, { "id": "identity_212688", "image": "TextVQA/images/0d0cdf59c4561f03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which city is hosting the event?" }, { "from": "gpt", "value": "The answer is Stuttgart" } ], "clip_score": 23.625, "loss": 4.386725902557373 }, { "id": "identity_311452", "image": "A-OKVQA/images/157037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this place called?\nChoices:\n(A) library\n(B) mall\n(C) supermarket\n(D) deli" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.03125, "loss": 4.246220588684082 }, { "id": "identity_330989", "image": "A-OKVQA/images/117098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these vehicles located?\nChoices:\n(A) bike show\n(B) garage\n(C) parking lot\n(D) road" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.578125, "loss": 4.372209548950195 }, { "id": "identity_130742", "image": "A-OKVQA/images/407820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are raising their hands in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.109375, "loss": 4.476168632507324 }, { "id": "identity_272733", "image": "FigureQA/images/84555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7265625, "loss": 4.59089994430542 }, { "id": "identity_145921", "image": "DocVQA/images/fqyf0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word on the page?" }, { "from": "gpt", "value": "The answer is aw" } ], "clip_score": 13.59375, "loss": 6.4417829513549805 }, { "id": "identity_66309", "image": "ScienceQA/images/15054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely use of the substance collected from this tree?" }, { "from": "gpt", "value": "The answer is To make natural rubber products" } ], "clip_score": 24.640625, "loss": 4.545034885406494 }, { "id": "identity_284275", "image": "PMC-VQA/images/PMC4356427_f1-ol-09-04-1667_366612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the lung is the mass located?\nChoices:\n(A) Upper lobe of right lung\n(B) Middle lobe of left lung\n(C) Lower lobe of right lung\n(D) Upper lobe of left lung" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.515625, "loss": 4.003729820251465 }, { "id": "identity_147270", "image": "DVQA/images/bar_train_00138552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the largest interquartile range (IQR)?" }, { "from": "gpt", "value": "The answer is date" } ], "clip_score": 25.203125, "loss": 4.0876688957214355 }, { "id": "identity_277311", "image": "PMC-VQA/images/PMC3141654_F11_103036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the white dotted line in the diagram?\nChoices:\n(A) The progress of fusion \n(B) The progress of inversion \n(C) The progress of lateral subdivision \n(D) The progress of developing brain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.859375, "loss": 4.794363498687744 }, { "id": "identity_154316", "image": "AI2D/images/837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary difference between stages 4 and 5 in the grasshopper's development as shown in the image?" }, { "from": "gpt", "value": "The answer is Wing development" } ], "clip_score": 32.4375, "loss": 5.283854961395264 }, { "id": "identity_328329", "image": "IconQA/images/10797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. Which image is tenth?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.96875, "loss": 4.41888952255249 }, { "id": "identity_6814", "image": "A-OKVQA/images/155811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What alerts people here of a safe crossing time?\nChoices:\n(A) policeman\n(B) crossing guard\n(C) walk light\n(D) horses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.25, "loss": 4.1890549659729 }, { "id": "identity_280143", "image": "ScienceQA/images/7592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted section? Is it:\n(A) Vanuatu\n(B) Nauru\n(C) New Zealand\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.03125, "loss": 4.664410591125488 }, { "id": "identity_338613", "image": "VQA2.0/images/474760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this in relation to the sink?" }, { "from": "gpt", "value": "The answer is adjacent" } ], "clip_score": 22.84375, "loss": 4.0849127769470215 }, { "id": "identity_105361", "image": "PMC-VQA/images/PMC8472088_diagnostics-11-01726-f003_70102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the circular structure in the center of the image?" }, { "from": "gpt", "value": "The answer is Lumen of a blood vessel" } ], "clip_score": 27.796875, "loss": 4.170607089996338 }, { "id": "identity_288444", "image": "PMC-VQA/images/PMC5145835_Fig7_103639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the femur shown in the image? \nOptions:\n(A) Anterior.\n(B) Posterior.\n(C) Medial.\n(D) Lateral." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.109375, "loss": 4.34727668762207 }, { "id": "identity_338101", "image": "PMC-VQA/images/PMC8125971_jcm-10-02026-f004_442779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which plane was used for this T1-weighted image?\nChoices:\n(A) Coronal plane\n(B) Sagittal plane\n(C) Axial plane\n(D) All of the above were used" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 4.222537517547607 }, { "id": "identity_62303", "image": "DocVQA/images/ykdv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the transaction?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.3125, "loss": 4.012758255004883 }, { "id": "identity_265907", "image": "PMC-VQA/images/PMC7394973_Fig5_223604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which elements are suspected to be related to the loosening of the tibial component? \nChoices:\n(A) Metal shavings\n(B) Cement fragments\n(C) MRI effusion\n(D) Bone dust" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.421875, "loss": 4.870175838470459 }, { "id": "identity_264276", "image": "A-OKVQA/images/42204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the occupation of the individuals working behind the different stands?\nChoices:\n(A) computer programming\n(B) sewing\n(C) vendors\n(D) turking" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.5078125, "loss": 4.902479648590088 }, { "id": "identity_14331", "image": "ScienceQA/images/43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) hard\n(B) soft\n(C) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.875, "loss": 4.1082987785339355 }, { "id": "identity_336376", "image": "A-OKVQA/images/409050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is outside here?\nChoices:\n(A) patio\n(B) deck\n(C) porch\n(D) balcony" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.15625, "loss": 4.019968509674072 }, { "id": "identity_188279", "image": "VQA-AS/images/7041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.296875, "loss": 4.155402183532715 }, { "id": "identity_308220", "image": "ScienceQA/images/8402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Biloxi\n(B) Little Rock\n(C) Charleston\n(D) Huntington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.640625, "loss": 4.713820457458496 }, { "id": "identity_18320", "image": "Super-CLEVR/images/superCLEVR_new_001275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a matte car that is behind the tiny matte car that is left of the matte minivan; what color is it?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 29.203125, "loss": 4.039420127868652 }, { "id": "identity_24828", "image": "PMC-VQA/images/PMC7196219_Fig4_162233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What view was not taken during the imaging process? \nChoices:\n(A) AP view \n(B) Lateral view \n(C) Oblique view \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.4375, "loss": 4.017178535461426 }, { "id": "identity_31450", "image": "VQA2.0/images/450621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the right girl holding?" }, { "from": "gpt", "value": "The answer is phone" } ], "clip_score": 16.5625, "loss": 4.67280912399292 }, { "id": "identity_148545", "image": "DocVQA/images/sffd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the memorandum addressed to?" }, { "from": "gpt", "value": "The answer is NACHHD Council Members" } ], "clip_score": 23.734375, "loss": 4.170886516571045 }, { "id": "identity_321110", "image": "A-OKVQA/images/79544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with the baby involved in?\nChoices:\n(A) burping\n(B) banking\n(C) travelling\n(D) marketing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.9921875, "loss": 4.620255947113037 }, { "id": "identity_281816", "image": "PMC-VQA/images/PMC6885221_fig1_76359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead in the image signify?\nOptions:\n(A) Thickened basement membrane.\n(B) Simple squamous epithelium.\n(C) Pseudostratified columnar epithelium.\n(D) Goblet cell." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.46875, "loss": 4.995451927185059 }, { "id": "identity_306042", "image": "A-OKVQA/images/170476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person while they are swimming?\nChoices:\n(A) in ocean\n(B) in pool\n(C) in river\n(D) in lake" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.46875, "loss": 4.178303241729736 }, { "id": "identity_51205", "image": "FigureQA/images/28620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors represent sections that are smaller than the Violet Red section?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.03125, "loss": 4.696254730224609 }, { "id": "identity_262427", "image": "ScienceQA/images/6644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following animals is a bird? \nChoices:\n(A) clownfish\n(B) Surinam horned frog\n(C) tiger shark\n(D) brown pelican" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.078125, "loss": 4.06514835357666 }, { "id": "identity_7713", "image": "ScienceQA/images/2575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) transparent\n(B) yellow\n(C) rough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.828125, "loss": 4.532309055328369 }, { "id": "identity_315216", "image": "ScienceQA/images/6426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this continent?\nChoices:\n(A) Australia\n(B) Asia\n(C) Antarctica\n(D) North America" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.46875, "loss": 4.816403388977051 }, { "id": "identity_9172", "image": "FigureQA/images/51383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.28125, "loss": 4.653827667236328 }, { "id": "identity_286247", "image": "A-OKVQA/images/185991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the probable uses of the buildings in the background?\nChoices:\n(A) offices\n(B) private dwellings\n(C) shops\n(D) schools" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.0234375, "loss": 5.015479564666748 }, { "id": "identity_270735", "image": "ScienceQA/images/3850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is best suited for Darell's experiment to answer:\n(A) Will steel nails rust faster when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails rust quicker in water than in vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.765625, "loss": 4.300484657287598 }, { "id": "identity_279231", "image": "A-OKVQA/images/196192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the content written on the tags?\nChoices:\n(A) hotel\n(B) destination\n(C) good luck\n(D) itinerary" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.078125, "loss": 4.787477016448975 }, { "id": "identity_218246", "image": "A-OKVQA/images/544994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are placed on the table in front of the person?" }, { "from": "gpt", "value": "The answer is Four objects" } ], "clip_score": 16.296875, "loss": 4.44992208480835 }, { "id": "identity_190793", "image": "VQA2.0/images/98390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the motorcycle directly behind the truck?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.171875, "loss": 4.130884647369385 }, { "id": "identity_304113", "image": "TextVQA/images/b4e93b744f962240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the letters after \"HR\"?" }, { "from": "gpt", "value": "The answer is aws" } ], "clip_score": 7.1484375, "loss": 4.227660179138184 }, { "id": "identity_285328", "image": "PMC-VQA/images/PMC5358222_F7_142636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which book is the image captured?\nOptions:\n(A) Gray's Anatomy\n(B) Nelson's Pediatrics\n(C) Robbins Pathology\n(D) Harrison's Internal Medicine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 5.334264755249023 }, { "id": "identity_146541", "image": "DocVQA/images/hpxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the grant mentioned in the memorandum?" }, { "from": "gpt", "value": "The answer is To cover the costs of the meeting in Kiwaii" } ], "clip_score": 24.484375, "loss": 4.023576736450195 }, { "id": "identity_303234", "image": "A-OKVQA/images/55868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this large bird?\nChoices:\n(A) flamingo\n(B) seahawk\n(C) stork\n(D) pelican" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.640625, "loss": 4.262655735015869 }, { "id": "identity_308380", "image": "PMC-VQA/images/PMC4965059_pone.0159463.g001_56949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining is shown in white in these images?\nChoices:\n(A) beta-amyloid,\n(B) tau,\n(C) synaptophysin,\n(D) GFAP." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.487542152404785 }, { "id": "identity_35600", "image": "A-OKVQA/images/319521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the black man's occupation?\nChoices:\n(A) doctor\n(B) salesman\n(C) lifeguard\n(D) officer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.859375, "loss": 4.350068092346191 }, { "id": "identity_203303", "image": "A-OKVQA/images/424091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes are visible on the road in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.5, "loss": 4.288732051849365 }, { "id": "identity_256294", "image": "A-OKVQA/images/385405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of animals can you identify in this pet store?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.359375, "loss": 4.398532867431641 }, { "id": "identity_9619", "image": "IconQA/images/5898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squares even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.40625, "loss": 4.564094066619873 }, { "id": "identity_253981", "image": "PMC-VQA/images/PMC6269333_Fig11_404761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct lesions can be identified in the T1-weighted coronal image, and what is their average size category?" }, { "from": "gpt", "value": "The answer is Not mentioned" } ], "clip_score": 31.390625, "loss": 5.041039943695068 }, { "id": "identity_218056", "image": "VizWiz/images/VizWiz_train_00006454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light sources are reflected on the table?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.4375, "loss": 4.433110237121582 }, { "id": "identity_104775", "image": "PMC-VQA/images/PMC7507560_rcr2662-fig-0002_258828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dense structure located in the center of the image?" }, { "from": "gpt", "value": "The answer is The spine" } ], "clip_score": 21.28125, "loss": 4.00369930267334 }, { "id": "identity_21589", "image": "A-OKVQA/images/507249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What vehicles are here?\nChoices:\n(A) pogo sticks\n(B) boats\n(C) cars\n(D) bicycles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.09375, "loss": 4.069822311401367 }, { "id": "identity_9298", "image": "TQA/images/abc_question_images/parts_microscope_17177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the diopter?\nChoices:\n(A) P\n(B) W\n(C) H\n(D) L" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.5625, "loss": 4.066208362579346 }, { "id": "identity_18373", "image": "PMC-VQA/images/PMC6459035_F1_464098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side was the lateral radiograph taken from? \nChoices:\n(A) Left knee \n(B) Right knee \n(C) Unspecified \n(D) Alternate knees. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.71875, "loss": 4.312228202819824 }, { "id": "identity_330952", "image": "PMC-VQA/images/PMC4567364_f03_422925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure indicated by the arrows in this image?\nChoices:\n(A) Ovary\n(B) Urethra\n(C) Vagina\n(D) Cervix" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.5625, "loss": 4.936395168304443 }, { "id": "identity_221504", "image": "A-OKVQA/images/243847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding the elephant?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.984375, "loss": 4.138672351837158 }, { "id": "identity_292550", "image": "VQA2.0/images/367357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jacket worn by the man?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 11.5703125, "loss": 4.379475116729736 }, { "id": "identity_298101", "image": "A-OKVQA/images/174721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which word is absent from the phrase that concludes with America?\nChoices:\n(A) good\n(B) wonderful\n(C) productive\n(D) hello" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.5703125, "loss": 4.059844970703125 }, { "id": "identity_337738", "image": "PMC-VQA/images/PMC4658764_Fig1_447501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe in the lung is affected by the ground glass opacity in this?\nChoices:\n(A) right middle lobe\n(B) left upper lobe\n(C) right lower lobe\n(D) left lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.6875, "loss": 4.055427074432373 }, { "id": "identity_299111", "image": "IconQA/images/35491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.584603786468506 }, { "id": "identity_216831", "image": "VQA2.0/images/37932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.265625, "loss": 4.674120903015137 }, { "id": "identity_299464", "image": "PMC-VQA/images/PMC6571772_pharmaceutics-11-00248-f004_494592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the patient's body is the catheter inserted?\nChoices:\n(A) Right carotid artery\n(B) Left carotid artery\n(C) Vein in the patient's leg\n(D) Intracranially." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.34375, "loss": 4.471067428588867 }, { "id": "identity_286879", "image": "DVQA/images/bar_train_00046911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the highest total accuracy?" }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 34.1875, "loss": 4.150793075561523 }, { "id": "identity_239577", "image": "TextVQA/images/cad24ea925944b15.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is directly under the midpoint between 10 and 20?" }, { "from": "gpt", "value": "The answer is o" } ], "clip_score": 10.265625, "loss": 4.211887359619141 }, { "id": "identity_9448", "image": "VQA2.0/images/479729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the hand holding?" }, { "from": "gpt", "value": "The answer is screwdriver" } ], "clip_score": 17.40625, "loss": 4.101544380187988 }, { "id": "identity_31126", "image": "DocVQA/images/xpfm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the co-chairman of the nutrition in the government panel?" }, { "from": "gpt", "value": "The answer is bill carey" } ], "clip_score": 26.125, "loss": 4.027760028839111 }, { "id": "identity_281206", "image": "PMC-VQA/images/PMC2566560_F1_28707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this case, which part of the finger is impacted?\nChoices:\n(A) Proximal phalanx\n(B) Intermediate phalanx\n(C) Distal phalanx\n(D) Metacarpals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.671875, "loss": 4.165593147277832 }, { "id": "identity_20908", "image": "A-OKVQA/images/132141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the foreground hovering over?\nChoices:\n(A) ramp\n(B) car\n(C) rooftop\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.65625, "loss": 4.526816368103027 }, { "id": "identity_71987", "image": "TQA/images/question_images/hydrocarbons_9119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of hydrogen atoms between the saturated hydrocarbon and the second unsaturated hydrocarbon shown in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 36.875, "loss": 4.656327724456787 }, { "id": "identity_286757", "image": "A-OKVQA/images/54039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the vehicles covered? \n(A) for decoration\n(B) to prevent theft\n(C) for protection\n(D) for transportation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.265625, "loss": 4.041504859924316 }, { "id": "identity_29907", "image": "A-OKVQA/images/37932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As it is walked by the woman what is inside of the dog's mouth?\nChoices:\n(A) umbrella\n(B) frisbee\n(C) bone\n(D) stick" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.984375, "loss": 4.1263346672058105 }, { "id": "identity_218975", "image": "PMC-VQA/images/PMC6448413_F1_461161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled with the letter \"B\"?" }, { "from": "gpt", "value": "The answer is Canine head lateral radiograph view" } ], "clip_score": 29.96875, "loss": 4.126607894897461 }, { "id": "identity_313515", "image": "A-OKVQA/images/39928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place located?\nChoices:\n(A) hamburg\n(B) vienna\n(C) new york\n(D) toronto" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4296875, "loss": 4.052613258361816 }, { "id": "identity_333899", "image": "PMC-VQA/images/PMC7591820_F2_283632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the soft tissue mass found?\nChoices:\n(A) Left nasal cavity\n(B) Maxillary sinus\n(C) Right nasal cavity\n(D) Ethmoid sinuses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.40625, "loss": 4.759634971618652 }, { "id": "identity_232540", "image": "Super-CLEVR/images/superCLEVR_new_001113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the object that is both left of the metal tandem bike and in front of the large truck?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.578125, "loss": 4.614681720733643 }, { "id": "identity_35070", "image": "PMC-VQA/images/PMC6678943_jcm-08-00996-f005_18468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the MRI images? \nChoices:\n(A) Posterior WM \n(B) Ocular manifestations \n(C) Caps \n(D) Anterior WM " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.953125, "loss": 4.120295524597168 }, { "id": "identity_194187", "image": "A-OKVQA/images/100234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bridges visible in the background?" }, { "from": "gpt", "value": "The answer is Yes, one bridge" } ], "clip_score": 19.25, "loss": 4.331887245178223 }, { "id": "identity_17882", "image": "TQA/images/question_images/parts_microscope_7180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose or objective lens?\nChoices:\n(A) To allow light to pass through the stage\n(B) For balance\n(C) The amount of zoom of the image\n(D) To hold the object" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.25, "loss": 5.065823554992676 }, { "id": "identity_216359", "image": "A-OKVQA/images/326936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is hanging on the wall above the counter?" }, { "from": "gpt", "value": "The answer is A wire mesh food cover" } ], "clip_score": 18.75, "loss": 4.7785868644714355 }, { "id": "identity_14918", "image": "TQA/images/question_images/cycle_rock_6742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the first step in producing a sedimentary rock from an igneous or metamorphic rock?\nChoices:\n(A) Cooling\n(B) Melting\n(C) Compaction and cementation\n(D) Weathering and erosion" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.03125, "loss": 4.479188442230225 }, { "id": "identity_265966", "image": "ScienceQA/images/3049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Kayla's experiment to answer?\nChoices:\n(A) Can the growth of tomato seedlings be influenced by the humidity level of the planting environment?\n(B) Is there a difference in the number of tomato seedlings that grow when planted in soil with fertilizer versus soil without fertilizer?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.28125, "loss": 4.201171875 }, { "id": "identity_329038", "image": "Super-CLEVR/images/superCLEVR_new_000189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the purple object that is on the right side of the large thing and the small object behind the big purple regular thing made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.796875, "loss": 5.710135459899902 }, { "id": "identity_33499", "image": "PMC-VQA/images/PMC8227570_children-08-00450-f001_476791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is indicated by the white arrow? \nChoices:\n(A) Pituitary gland \n(B) Corpus Callosum \n(C) Spinal cord \n(D) Cerebellum " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.640625, "loss": 4.9207329750061035 }, { "id": "identity_105128", "image": "PMC-VQA/images/PMC6296120_Fig1_412051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is predominantly affected by the pathology seen in the image?" }, { "from": "gpt", "value": "The answer is The tibia" } ], "clip_score": 30.625, "loss": 4.277930736541748 }, { "id": "identity_297620", "image": "PMC-VQA/images/PMC8913485_Fig5_219873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended use of the displayed image?\nOptions:\n(A) To compare two sets of CT scans\n(B) To demonstrate a medical treatment procedure\n(C) To showcase 3D reconstruction\n(D) To emphasize imaging artifacts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.75, "loss": 4.534558296203613 }, { "id": "identity_2067", "image": "ScienceQA/images/3122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Nauru\n(B) the Marshall Islands\n(C) Vanuatu\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.234375, "loss": 4.150843620300293 }, { "id": "identity_5414", "image": "ScienceQA/images/18831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term matches the picture?\nChoices:\n(A) echinoderm\n(B) cnidarian" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.09375, "loss": 4.141029357910156 }, { "id": "identity_307638", "image": "FigureQA/images/84449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Indigo?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.34375, "loss": 4.223865985870361 }, { "id": "identity_7490", "image": "Super-CLEVR/images/superCLEVR_new_000269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the large bicycle behind the blue rubber object and the tiny road bike have the same material?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.625, "loss": 5.374311923980713 }, { "id": "identity_269622", "image": "FigureQA/images/78649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Web Maroon or Dark Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.328125, "loss": 4.516737937927246 }, { "id": "identity_63276", "image": "AI2D/images/3797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves can be visibly counted on the plant in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 33.40625, "loss": 4.900217533111572 }, { "id": "identity_306207", "image": "FigureQA/images/80131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.5, "loss": 4.099135875701904 }, { "id": "identity_305673", "image": "FigureQA/images/23033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.588382720947266 }, { "id": "identity_17286", "image": "Super-CLEVR/images/superCLEVR_new_001127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that is made of the same material as the large yellow bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.296875, "loss": 4.925359725952148 }, { "id": "identity_277301", "image": "FigureQA/images/89413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green the option with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.515625, "loss": 4.085801601409912 }, { "id": "identity_303947", "image": "AI2D/images/2995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the secondary phloem and secondary xylem?\nChoices:\n(A) cork cambium\n(B) Vascular cambium\n(C) pith\n(D) cortex" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.796875, "loss": 4.13916540145874 }, { "id": "identity_33760", "image": "PMC-VQA/images/PMC8933768_brb32490-fig-0003_226525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are shown in the MRI image?\nChoices:\n(A) Cystic lesions \n(B) Inflammatory lesions \n(C) Hemorrhagic lesions \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.8125, "loss": 4.48788595199585 }, { "id": "identity_335687", "image": "DVQA/images/bar_train_00105203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values less than 9?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 22.171875, "loss": 4.1973981857299805 }, { "id": "identity_30771", "image": "PMC-VQA/images/PMC5978832_Fig2_321894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main symptom of this condition? \nChoices:\n(A) Nausea \n(B) Headache \n(C) Joint pain \n(D) Chest pain " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.546875, "loss": 4.407415866851807 }, { "id": "identity_25555", "image": "PMC-VQA/images/PMC7394290_fig2_223351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the opacities located?\nChoices:\n(A) Right lung \n(B) Both lungs \n(C) Left lung \n(D) Abdomen " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.72831916809082 }, { "id": "identity_249497", "image": "PMC-VQA/images/PMC3635941_F3_200806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the imaging technique used in this image, and how do they help in differentiating it from other imaging techniques?" }, { "from": "gpt", "value": "The answer is High contrast resolution, no ionizing radiation, detailed soft tissue images" } ], "clip_score": 27.96875, "loss": 4.016049861907959 }, { "id": "identity_5661", "image": "IconQA/images/46420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.078125, "loss": 4.14033842086792 }, { "id": "identity_275146", "image": "Super-CLEVR/images/superCLEVR_new_000253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any other items share the same material as the small blue cruiser?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.0625, "loss": 4.985468864440918 }, { "id": "identity_7945", "image": "ScienceQA/images/4936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of the persimmon tree?\nChoices:\n(A) silver maple\n(B) black racer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.03125, "loss": 4.912658214569092 }, { "id": "identity_30536", "image": "A-OKVQA/images/323249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The vehicle here is powered by something located where in relation to it?\nChoices:\n(A) above\n(B) street side\n(C) inside\n(D) under" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.296875, "loss": 4.252130508422852 }, { "id": "identity_271102", "image": "PMC-VQA/images/PMC7057593_Fig5_129341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technology was utilized for preoperative planning?\nChoices:\n(A) Postoperative X-ray \n(B) Preoperative X-ray \n(C) X-ray in the final follow-up \n(D) Preoperative CT with 3 D reconstruction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.171875, "loss": 4.219046592712402 }, { "id": "identity_321548", "image": "A-OKVQA/images/497261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What role does this entity play in this game?\nChoices:\n(A) site provider\n(B) loan provider\n(C) sponsor\n(D) fund provider" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.59375, "loss": 4.509523391723633 }, { "id": "identity_32193", "image": "A-OKVQA/images/466484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object on one of the boats would help someone prepare for rain?\nChoices:\n(A) bucket\n(B) tarp\n(C) sail\n(D) motor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.540694236755371 }, { "id": "identity_156926", "image": "TQA/images/question_images/simple_machines_9249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct objects are involved in the operation of this simple machine?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.65625, "loss": 4.685859680175781 }, { "id": "identity_31489", "image": "A-OKVQA/images/184201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of boat is this?\nChoices:\n(A) tow boat\n(B) fishing\n(C) transport\n(D) coast guard" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.75, "loss": 4.452219486236572 }, { "id": "identity_262418", "image": "TQA/images/abc_question_images/ocean_zones_17139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the Bathypelagic zone:\n(A) C\n(B) H\n(C) A\n(D) F" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.953125, "loss": 5.025169372558594 }, { "id": "identity_290497", "image": "TabMWP/images/28055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.673928737640381 }, { "id": "identity_290341", "image": "TabMWP/images/20249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With $1,700.00, can Arianna afford to purchase both an oboe and a trumpet?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.78125, "loss": 4.113322734832764 }, { "id": "identity_120706", "image": "TextVQA/images/5621f494bcbe63b5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many angel figurines are on the right side of the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.46875, "loss": 4.782495975494385 }, { "id": "identity_307615", "image": "FigureQA/images/23119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.328125, "loss": 4.222311973571777 }, { "id": "identity_309334", "image": "TQA/images/abc_question_images/human_system_nervous_16167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the motor cortex?\nChoices:\n(A) R\n(B) N\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.578125, "loss": 4.205824851989746 }, { "id": "identity_32122", "image": "IconQA/images/31203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a star. Which picture is third?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.485556125640869 }, { "id": "identity_257732", "image": "PMC-VQA/images/PMC4585177_F4_427482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in performing ROI analysis and how does the image shown in (B) help in visualizing the normalized ROI analysis?" }, { "from": "gpt", "value": "The answer is The image shows the activation of specific brain regions after normalization" } ], "clip_score": 26.515625, "loss": 4.0922956466674805 }, { "id": "identity_14663", "image": "TQA/images/question_images/human_system_digestive_1385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these parts of the digestive system is roughly 7 meters long in adults?\nChoices:\n(A) large intestine\n(B) small intestine\n(C) esophagus\n(D) rectum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.9375, "loss": 4.184787273406982 }, { "id": "identity_308047", "image": "FigureQA/images/40568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.36093807220459 }, { "id": "identity_3231", "image": "Super-CLEVR/images/superCLEVR_new_000244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a shiny motorbike of the same color as the bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.796875, "loss": 5.218212604522705 }, { "id": "identity_59777", "image": "DocVQA/images/xkjp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the coordinator mentioned in the Anniversaries Banquet section?" }, { "from": "gpt", "value": "The answer is Food for Peace Agency for International Development" } ], "clip_score": 23.453125, "loss": 4.20902681350708 }, { "id": "identity_304932", "image": "DocVQA/images/xnkp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the passenger?" }, { "from": "gpt", "value": "The answer is dr. William darby" } ], "clip_score": 22.59375, "loss": 4.701292037963867 }, { "id": "identity_106871", "image": "VizWiz/images/VizWiz_train_00017849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the device shown in the image?" }, { "from": "gpt", "value": "The answer is Humanware" } ], "clip_score": 23.875, "loss": 4.126183032989502 }, { "id": "identity_254937", "image": "ScienceQA/images/652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted on the map and name its capital" }, { "from": "gpt", "value": "The answer is Boston" } ], "clip_score": 26.625, "loss": 4.233443737030029 }, { "id": "identity_300898", "image": "TQA/images/abc_question_images/nuclear_energy_17105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part labeled M?\nChoices:\n(A) BORON CONTROL RODS\n(B) URANIUM FUEL RODS\n(C) PUMP\n(D) CORE" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.1875, "loss": 5.22445011138916 }, { "id": "identity_203576", "image": "A-OKVQA/images/60387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the sidewalk across the street?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.109375, "loss": 4.38785457611084 }, { "id": "identity_134894", "image": "DocVQA/images/zlcn0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many exports does the chemistry industry account for in the U.S. according to the document?" }, { "from": "gpt", "value": "The answer is Fourteen percent" } ], "clip_score": 21.5, "loss": 4.176811218261719 }, { "id": "identity_26375", "image": "A-OKVQA/images/2315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a unique feature of these animals?\nChoices:\n(A) quills\n(B) gills\n(C) neck\n(D) trunk" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.140625, "loss": 4.264448642730713 }, { "id": "identity_73332", "image": "TQA/images/abc_question_images/nuclear_energy_18113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter M represent in the diagram?" }, { "from": "gpt", "value": "The answer is The rearranged cluster of particles after impact" } ], "clip_score": 29.734375, "loss": 4.488284111022949 }, { "id": "identity_336140", "image": "PMC-VQA/images/PMC5738007_F1_255092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical imaging technique used here? \nChoices:\n(A) Ultrasound \n(B) X-ray \n(C) MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.546875, "loss": 4.797566890716553 }, { "id": "identity_61447", "image": "MapQA/images/map_8862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range of commissioners in Florida?" }, { "from": "gpt", "value": "The answer is A color name" } ], "clip_score": 29.734375, "loss": 5.561763286590576 }, { "id": "identity_269535", "image": "TabMWP/images/29361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Myra have sufficient funds to purchase both a CD player and an alarm clock with her $42.50?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.4375, "loss": 4.093964099884033 }, { "id": "identity_317325", "image": "TextVQA/images/25ecbb91943dfe78.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large M red?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.078125, "loss": 4.01110315322876 }, { "id": "identity_106768", "image": "A-OKVQA/images/202880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many giraffes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.328125, "loss": 4.053793907165527 }, { "id": "identity_227581", "image": "DocVQA/images/xkmd0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What were the reasons for attending the event, and what type of honorarium was offered?" }, { "from": "gpt", "value": "The answer is Interest in topic, honoraria, reputation of speakers, meals" } ], "clip_score": 20.453125, "loss": 4.27730655670166 }, { "id": "identity_225729", "image": "DocVQA/images/fpbw0217_29.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why are nurse practitioners considered an appropriate audience to target, and what are the key factors that make them significant in the context of prescribing medication and patient care?" }, { "from": "gpt", "value": "The answer is Licensing, significant presence in family care, patient care role" } ], "clip_score": 29.515625, "loss": 4.750630855560303 }, { "id": "identity_194249", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.9375, "loss": 4.787684917449951 }, { "id": "identity_274158", "image": "FigureQA/images/58896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle not the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.010887145996094 }, { "id": "identity_334747", "image": "PMC-VQA/images/PMC6158818_Fig2_372636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the increased tracer uptake observed in this case?\nChoices:\n(A) Left femoral shaft\n(B) Right femoral shaft\n(C) Femoral head\n(D) Pelvis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.513445854187012 }, { "id": "identity_115821", "image": "VQA-AS/images/8818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What food item is placed in the center of the picnic blanket?" }, { "from": "gpt", "value": "The answer is Pie or tart" } ], "clip_score": 31.640625, "loss": 4.531410217285156 }, { "id": "identity_328098", "image": "Super-CLEVR/images/superCLEVR_new_000507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny green bicycle that is in front of a tiny bicycle that is right of a regular bus in front of a matte road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.75, "loss": 5.314631938934326 }, { "id": "identity_289074", "image": "A-OKVQA/images/514722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What issue is currently affecting this area? \nChoices:\n(A) water shortage\n(B) traffic congestion\n(C) water pollution\n(D) landslide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.453125, "loss": 4.772313594818115 }, { "id": "identity_270201", "image": "PMC-VQA/images/PMC4706984_fig3_460653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in Figure 2 of the MR enterography?\nChoices:\n(A) Decreased bowel wall thickness\n(B) Increased bowel wall thickness\n(C) Normal bowel wall thickness\n(D) No bowel loop present" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.265625, "loss": 4.277870178222656 }, { "id": "identity_295086", "image": "A-OKVQA/images/36907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of individuals will be boarding those planes? \nChoices:\n(A) airforce\n(B) animals\n(C) passengers\n(D) stunt pilots" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.75, "loss": 4.3673295974731445 }, { "id": "identity_212022", "image": "PMC-VQA/images/PMC7325100_Fig10_201909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colored arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.6953125, "loss": 4.23478889465332 }, { "id": "identity_268701", "image": "ScienceQA/images/17694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Tonga\n(B) Tuvalu\n(C) Fiji\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.21875, "loss": 4.1014909744262695 }, { "id": "identity_12582", "image": "FigureQA/images/6362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.2199788093566895 }, { "id": "identity_335657", "image": "ScienceQA/images/20128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown here?\nChoices:\n(A) New Jersey\n(B) Maine\n(C) Massachusetts\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.59375, "loss": 4.642578601837158 }, { "id": "identity_131381", "image": "PMC-VQA/images/PMC7532635_Fig1_265536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct white arrows are present in the ultrasound image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 32.90625, "loss": 4.870039939880371 }, { "id": "identity_20731", "image": "PMC-VQA/images/PMC5469828_Fig5_178756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the canaliculi in the THG image?\nChoices:\n(A) Along the radial direction \n(B) Along the tangential direction \n(C) In a circular pattern \n(D) Randomly oriented " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.390625, "loss": 4.3660969734191895 }, { "id": "identity_299611", "image": "TextVQA/images/ae02b7e83ca1be6d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which side is the men's room located?" }, { "from": "gpt", "value": "The answer is left" } ], "clip_score": 14.5234375, "loss": 4.944831848144531 }, { "id": "identity_277938", "image": "TQA/images/question_images/human_system_muscular_6158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is composed of the Brachioradialis muscle?\nChoices:\n(A) leg\n(B) arm\n(C) shoulder\n(D) foot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.318679332733154 }, { "id": "identity_329101", "image": "PMC-VQA/images/PMC6492342_Fig1_473846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique shows a hyperintense signal in this area?\nChoices:\n(A) T1-weighted imaging \n(B) T2-weighted imaging \n(C) FLAIR imaging \n(D) DWI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5, "loss": 4.463497161865234 }, { "id": "identity_134839", "image": "DocVQA/images/qjgn0226_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product for which the royalty is requested?" }, { "from": "gpt", "value": "The answer is Taperloc Complete" } ], "clip_score": 17.96875, "loss": 5.009953022003174 }, { "id": "identity_37616", "image": "A-OKVQA/images/472079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The person in the foreground wearing blue looks most like what Sopranos character?\nChoices:\n(A) bobby baccalieri\n(B) uncle junior\n(C) carmela soprano\n(D) livia soprano" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1953125, "loss": 4.140871047973633 }, { "id": "identity_324642", "image": "PMC-VQA/images/PMC4982419_Fig1_62322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is depicted in the figure?\nChoices:\n(A) X-ray image.\n(B) CT scan.\n(C) MRI.\n(D) PET scan." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.296875, "loss": 4.67546272277832 }, { "id": "identity_202979", "image": "PMC-VQA/images/PMC3034685_F4_86438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common feature indicated by the red arrowheads across all microphotographs?" }, { "from": "gpt", "value": "The answer is Developing notochord" } ], "clip_score": 31.375, "loss": 4.49351167678833 }, { "id": "identity_26741", "image": "A-OKVQA/images/488822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the rugs for?\nChoices:\n(A) moisture\n(B) decoration\n(C) seating\n(D) cleaning" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.7265625, "loss": 4.203097820281982 }, { "id": "identity_19249", "image": "FigureQA/images/53122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Yellow the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.59375, "loss": 4.228968143463135 }, { "id": "identity_308576", "image": "ScienceQA/images/18274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which were part of an experimental group in this experiment?\nChoices:\n(A) the containers that got worms\n(B) the containers that did not get worms" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.3046875, "loss": 4.162572383880615 }, { "id": "identity_116275", "image": "A-OKVQA/images/334291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pendant lights above the kitchen island?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.484375, "loss": 4.15069580078125 }, { "id": "identity_263856", "image": "IconQA/images/84769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a square as a face of this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.765625, "loss": 4.401846885681152 }, { "id": "identity_16483", "image": "TQA/images/abc_question_images/human_system_circulatory_16067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Mitral Valve?\nChoices:\n(A) M\n(B) R\n(C) A\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.40126371383667 }, { "id": "identity_193042", "image": "PMC-VQA/images/PMC7911842_diagnostics-11-00169-f001_375905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images show a cross-sectional view of the brain?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.1875, "loss": 4.382994174957275 }, { "id": "identity_10067", "image": "AI2D/images/2329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the life cycle, what is illustrated by letter G?\nChoices:\n(A) gonad\n(B) polyp\n(C) planula\n(D) mouth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.572714328765869 }, { "id": "identity_149875", "image": "DocVQA/images/mgph0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of water for this analysis?" }, { "from": "gpt", "value": "The answer is Drips from \"B\" Pan" } ], "clip_score": 16.515625, "loss": 4.376501560211182 }, { "id": "identity_18232", "image": "DVQA/images/bar_train_00137713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is mail" } ], "clip_score": 20.234375, "loss": 4.013676643371582 }, { "id": "identity_328748", "image": "FigureQA/images/67579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.0703125, "loss": 4.129950046539307 }, { "id": "identity_289670", "image": "A-OKVQA/images/117690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the dog flying towards the frisbee in the given scenario?\nChoices:\n(A) is diving\n(B) to catch\n(C) was thrown\n(D) is confused" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.515625, "loss": 4.523515224456787 }, { "id": "identity_303544", "image": "A-OKVQA/images/130339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person wearing?\nChoices:\n(A) glasses\n(B) fedora\n(C) top hat\n(D) boa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.8671875, "loss": 4.681105613708496 }, { "id": "identity_15962", "image": "PMC-VQA/images/PMC7706278_Fig2_311596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of free-air accumulation in the image? \nChoices:\n(A) Spleen \n(B) Para-aortic area \n(C) Ascending colon \n(D) Pancreas " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.515625, "loss": 4.256862640380859 }, { "id": "identity_324199", "image": "A-OKVQA/images/364691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is this skateboarder participating in?\nChoices:\n(A) deathmatch\n(B) slalom\n(C) best trick\n(D) lap race" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.53125, "loss": 4.319963455200195 }, { "id": "identity_270147", "image": "DocVQA/images/ylvw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which project has the scheduled release date set for the third quarter of 2002?" }, { "from": "gpt", "value": "The answer is Totelle CD-ROM 1 and 2mg" } ], "clip_score": 20.96875, "loss": 4.647124767303467 }, { "id": "identity_331056", "image": "A-OKVQA/images/99568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What genre of magazine does this individual enjoy?\nChoices:\n(A) entertainment\n(B) horror comic\n(C) cars\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.09375, "loss": 4.0358452796936035 }, { "id": "identity_261716", "image": "TQA/images/abc_question_images/parts_worm_17347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the letter that comes between the crop and the clitellum?\nChoices:\n(A) E\n(B) L\n(C) A\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.203387260437012 }, { "id": "identity_2464", "image": "IconQA/images/7426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bear. Which picture is eighth?\nChoices:\n(A) plane\n(B) bear\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.03125, "loss": 4.020940780639648 }, { "id": "identity_39776", "image": "DocVQA/images/phpj0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Public Relations Assistant who signed the letter?" }, { "from": "gpt", "value": "The answer is Chris Fiedler Houchky" } ], "clip_score": 32.46875, "loss": 6.088493824005127 }, { "id": "identity_238800", "image": "ScienceQA/images/18416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly south of the colony shown in dark green, and what is its capital city?" }, { "from": "gpt", "value": "The answer is Dover" } ], "clip_score": 30.546875, "loss": 4.038656711578369 }, { "id": "identity_283577", "image": "PMC-VQA/images/PMC7332021_pone.0234792.g005_204142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the nuclei stain?\nOptions:\n(A) Red\n(B) Blue\n(C) Yellow\n(D) Green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.046875, "loss": 4.285684585571289 }, { "id": "identity_323645", "image": "DVQA/images/bar_train_00007475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the darkorange color?" }, { "from": "gpt", "value": "The answer is tax" } ], "clip_score": 27.984375, "loss": 4.289395332336426 }, { "id": "identity_31410", "image": "PMC-VQA/images/PMC3718344_RSOB130015F3_219230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show in section (e)? \nChoices:\n(A) Reaction product surrounding elongate cells, \n(B) Profile of a nerve fiber, \n(C) The basal region of a taste bud, \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.859375, "loss": 4.45690393447876 }, { "id": "identity_310557", "image": "FigureQA/images/20955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.703125, "loss": 4.029571533203125 }, { "id": "identity_73217", "image": "ScienceQA/images/12975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the prefix \"Ante\" signify in the given words?" }, { "from": "gpt", "value": "The answer is Before" } ], "clip_score": 23.578125, "loss": 4.5531511306762695 }, { "id": "identity_109173", "image": "TextVQA/images/23bfdbe38957354f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the bottle label?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 28.640625, "loss": 4.150048732757568 }, { "id": "identity_21562", "image": "ScienceQA/images/311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this belt made of?\nChoices:\n(A) ceramic\n(B) leather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.96875, "loss": 4.20278263092041 }, { "id": "identity_289960", "image": "ScienceQA/images/11630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material was used to make this table?\nChoices:\n(A) asphalt\n(B) metal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.25, "loss": 4.249983787536621 }, { "id": "identity_15514", "image": "A-OKVQA/images/239001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's the purpose of the poled items?\nChoices:\n(A) block sun\n(B) block rain\n(C) block wind\n(D) block snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.9375, "loss": 4.65870475769043 }, { "id": "identity_313622", "image": "FigureQA/images/73660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.9375, "loss": 4.011507987976074 }, { "id": "identity_255299", "image": "AI2D/images/470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the diagram below and your own knowledge, what will happen to the population of water plants if the population of carp increases?" }, { "from": "gpt", "value": "The answer is It will decrease" } ], "clip_score": 32.5625, "loss": 4.280421257019043 }, { "id": "identity_221209", "image": "VQA2.0/images/393121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the street?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.078125, "loss": 4.560275077819824 }, { "id": "identity_10931", "image": "ScienceQA/images/13091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Illinois?\nChoices:\n(A) Chicago\n(B) Topeka\n(C) Springfield\n(D) Nashville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.109375, "loss": 4.202304840087891 }, { "id": "identity_294329", "image": "A-OKVQA/images/243847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are the two humans engaging in with the elephant? Are they:\n(A) riding it\n(B) petting it\n(C) playing with it\n(D) feeding it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.0, "loss": 4.218220233917236 }, { "id": "identity_267468", "image": "TabMWP/images/38043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.214577674865723 }, { "id": "identity_30292", "image": "PMC-VQA/images/PMC7818493_cne24965-fig-0013_346564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the blood vessel in the image? \nChoices:\n(A) Zone 1 \n(B) Zone 2 \n(C) Zone 3 \n(D) Zone 4 " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.625, "loss": 4.023507595062256 }, { "id": "identity_127260", "image": "Super-CLEVR/images/superCLEVR_new_000666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is furthest to the left in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.6875, "loss": 4.202974796295166 }, { "id": "identity_19797", "image": "VQA2.0/images/49640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was this pic taken?" }, { "from": "gpt", "value": "The answer is outside" } ], "clip_score": 17.828125, "loss": 4.088839530944824 }, { "id": "identity_23477", "image": "A-OKVQA/images/127074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the bag on the grass in front of this man?\nChoices:\n(A) duffle bag\n(B) sports bag\n(C) drawstring bag\n(D) handbag" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.453125, "loss": 4.2408952713012695 }, { "id": "identity_36919", "image": "Super-CLEVR/images/superCLEVR_new_000414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large green metal things of the same shape as the large brown metal thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.671875, "loss": 5.290689468383789 }, { "id": "identity_19695", "image": "DVQA/images/bar_train_00145287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category does the forestgreen color represent?" }, { "from": "gpt", "value": "The answer is cent" } ], "clip_score": 20.328125, "loss": 5.497282028198242 }, { "id": "identity_307292", "image": "FigureQA/images/73803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.8125, "loss": 4.421039581298828 }, { "id": "identity_106907", "image": "VQA2.0/images/486711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.84375, "loss": 4.0787763595581055 }, { "id": "identity_191082", "image": "TextVQA/images/3a6d2b256ab581fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jacket hanging on the chair?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.34375, "loss": 4.641117572784424 }, { "id": "identity_2898", "image": "VQA2.0/images/130735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the mascot standing?" }, { "from": "gpt", "value": "The answer is on railing" } ], "clip_score": 24.03125, "loss": 4.065334796905518 }, { "id": "identity_190482", "image": "A-OKVQA/images/522819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candles are lit on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.796875, "loss": 4.2674641609191895 }, { "id": "identity_322476", "image": "A-OKVQA/images/17882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the triangular barricades?\nChoices:\n(A) prevent parking\n(B) hang things\n(C) decorate street\n(D) block road" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.296875, "loss": 4.8611016273498535 }, { "id": "identity_309388", "image": "FigureQA/images/31702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.390625, "loss": 4.046399116516113 }, { "id": "identity_5424", "image": "TQA/images/abc_question_images/photosynthesis_14130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates energy in the form of sunlight entering the plant?\nChoices:\n(A) U\n(B) J\n(C) A\n(D) T" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.625, "loss": 4.188327789306641 }, { "id": "identity_287340", "image": "VQA-AS/images/8713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the cat overweight?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.8125, "loss": 4.09902811050415 }, { "id": "identity_54566", "image": "DVQA/images/bar_train_00067752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store had the lowest sales for the item 'mother'?" }, { "from": "gpt", "value": "The answer is mid" } ], "clip_score": 30.59375, "loss": 4.615914344787598 }, { "id": "identity_199447", "image": "A-OKVQA/images/413959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the steps in the background?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.6875, "loss": 4.989801406860352 }, { "id": "identity_334626", "image": "A-OKVQA/images/373574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What device is next to this?\nChoices:\n(A) kindle\n(B) tablet\n(C) laptop\n(D) cell phone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.75, "loss": 4.1900529861450195 }, { "id": "identity_27399", "image": "VizWiz/images/VizWiz_train_00005917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you please tell me what this product is?" }, { "from": "gpt", "value": "The answer is spynet" } ], "clip_score": 31.125, "loss": 4.544753074645996 }, { "id": "identity_269192", "image": "DVQA/images/bar_train_00134889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.03125, "loss": 4.330430030822754 }, { "id": "identity_35772", "image": "A-OKVQA/images/250192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this this elephant doing?\nChoices:\n(A) drinking\n(B) painting\n(C) playing soccer\n(D) eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 4.095224380493164 }, { "id": "identity_197080", "image": "A-OKVQA/images/500271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the sweater worn by the person in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.625, "loss": 4.264437198638916 }, { "id": "identity_157356", "image": "AI2D/images/3198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the \"Stem\" contribute to the plant's survival according to the diagram?" }, { "from": "gpt", "value": "The answer is Supports and transports resources" } ], "clip_score": 33.125, "loss": 4.08532190322876 }, { "id": "identity_105905", "image": "A-OKVQA/images/578705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sign is directly above the motorcycle?" }, { "from": "gpt", "value": "The answer is An informational sign" } ], "clip_score": 23.796875, "loss": 4.136118412017822 }, { "id": "identity_30304", "image": "IconQA/images/72975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, which color are you more likely to pick?\nChoices:\n(A) neither; black and light blue are equally likely\n(B) light blue\n(C) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5, "loss": 4.474137306213379 }, { "id": "identity_279274", "image": "FigureQA/images/32531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Indigo or Dark Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 4.525395393371582 }, { "id": "identity_187182", "image": "PMC-VQA/images/PMC4884350_Fig3_35408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the arrow pointing to the area of interest?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.484375, "loss": 4.268289089202881 }, { "id": "identity_284879", "image": "DVQA/images/bar_train_00048541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the darkorange color?" }, { "from": "gpt", "value": "The answer is change" } ], "clip_score": 25.296875, "loss": 4.338281631469727 }, { "id": "identity_10198", "image": "ScienceQA/images/4591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Tuvalu\n(B) Fiji\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.796875, "loss": 4.098650932312012 }, { "id": "identity_335598", "image": "FigureQA/images/84544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8671875, "loss": 4.02410364151001 }, { "id": "identity_272100", "image": "TextVQA/images/1a625079edf146ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of suits are these?" }, { "from": "gpt", "value": "The answer is mobile" } ], "clip_score": 14.8984375, "loss": 4.1688666343688965 }, { "id": "identity_120950", "image": "VQA2.0/images/450748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible glasses are on the table?" }, { "from": "gpt", "value": "The answer is Two glasses" } ], "clip_score": 13.5234375, "loss": 4.0772528648376465 }, { "id": "identity_1860", "image": "A-OKVQA/images/132773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What number is next to Q on the sign?\nChoices:\n(A) three\n(B) eight\n(C) five\n(D) ten" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.88671875, "loss": 4.043680191040039 }, { "id": "identity_109601", "image": "VQA2.0/images/304218.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sign on the fence?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 10.5546875, "loss": 4.183040618896484 }, { "id": "identity_274021", "image": "AI2D/images/4297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image provided, which layer is situated above the upper mantle?\nOptions:\n(A) oceanic crust\n(B) continental crust\n(C) plastic asthenosphere\n(D) lithosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.5625, "loss": 4.655468940734863 }, { "id": "identity_168714", "image": "IconQA/images/214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image is second in the sequence?" }, { "from": "gpt", "value": "The answer is A house" } ], "clip_score": 19.25, "loss": 4.344751834869385 }, { "id": "identity_62469", "image": "DVQA/images/bar_train_00043443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the least preferred object in the 'region' category?" }, { "from": "gpt", "value": "The answer is habit" } ], "clip_score": 26.21875, "loss": 4.05102014541626 }, { "id": "identity_123481", "image": "A-OKVQA/images/208963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many drums are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.140625, "loss": 4.376607894897461 }, { "id": "identity_251452", "image": "A-OKVQA/images/255006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of communication devices are present on the desk, and which one is unusual due to its corded nature?" }, { "from": "gpt", "value": "The answer is Three types, corded phone" } ], "clip_score": 27.171875, "loss": 4.540555477142334 }, { "id": "identity_53141", "image": "DocVQA/images/ylym0020_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Payment of Time Frame for Monarch?" }, { "from": "gpt", "value": "The answer is The column is blank" } ], "clip_score": 21.0625, "loss": 4.452696323394775 }, { "id": "identity_337949", "image": "TabMWP/images/20014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.015625, "loss": 4.173544406890869 }, { "id": "identity_302261", "image": "PMC-VQA/images/PMC2989308_F1_79157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which veins are indicated in the caption?\nChoices:\n(A) Femoral vein and popliteal vein \n(B) Posterior tibial vein and peroneal vein \n(C) External iliac vein and common femoral vein \n(D) Saphenous vein and deep femoral vein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.241753578186035 }, { "id": "identity_285948", "image": "A-OKVQA/images/4535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event are the costumed riders participating in? Is it a steeple chase, parade, renaissance faire, or sideshow?\nChoices:\n(A) steeple chase\n(B) parade\n(C) renaissance faire\n(D) sideshow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.484375, "loss": 4.070158004760742 }, { "id": "identity_231052", "image": "DocVQA/images/kzbn0226_40.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main reasons EPA suggested modifying the statutory definition, and how does the proposed revision intend to broaden the scope of EPA’s evaluation?" }, { "from": "gpt", "value": "The answer is To identify additional populations and consider broader factors" } ], "clip_score": 25.578125, "loss": 4.7935309410095215 }, { "id": "identity_19798", "image": "ScienceQA/images/18705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Tonga\n(B) New Zealand\n(C) Australia\n(D) Papua New Guinea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.203125, "loss": 4.014156818389893 }, { "id": "identity_77941", "image": "TQA/images/abc_question_images/photosynthesis_14107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label U indicate in the image?" }, { "from": "gpt", "value": "The answer is Groundwater storage" } ], "clip_score": 23.0, "loss": 4.548172950744629 }, { "id": "identity_298301", "image": "A-OKVQA/images/142002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which design is located beneath the wing? \nOptions:\n(A) half moon\n(B) cross\n(C) gorgon\n(D) star" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.6484375, "loss": 4.11622953414917 }, { "id": "identity_47311", "image": "DVQA/images/bar_train_00049460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object in the category 'animal' is preferred by the most number of people?" }, { "from": "gpt", "value": "The answer is part" } ], "clip_score": 27.75, "loss": 4.072880744934082 }, { "id": "identity_328858", "image": "PMC-VQA/images/PMC9414956_microorganisms-10-01640-f003_385876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bone is affected by the disease shown here?\nChoices:\n(A) Proximal tibia.\n(B) Distal tibia.\n(C) Proximal fibula.\n(D) Distal fibula." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 4.420681953430176 }, { "id": "identity_17663", "image": "VQA-AS/images/9407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in front of the dog?" }, { "from": "gpt", "value": "The answer is bone" } ], "clip_score": 13.25, "loss": 4.253544330596924 }, { "id": "identity_128582", "image": "VQA2.0/images/159601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing helmets?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.484375, "loss": 4.48239803314209 }, { "id": "identity_320592", "image": "A-OKVQA/images/68657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the blurry boxes in the background likely to contain?\nChoices:\n(A) seeds\n(B) action figures\n(C) video games\n(D) raisins" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.5546875, "loss": 4.2689290046691895 }, { "id": "identity_69461", "image": "ScienceQA/images/999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.0, "loss": 4.823992729187012 }, { "id": "identity_5974", "image": "FigureQA/images/83197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.7578125, "loss": 4.161510467529297 }, { "id": "identity_38523", "image": "VizWiz/images/VizWiz_train_00014188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of soup was that?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 28.109375, "loss": 4.029418468475342 }, { "id": "identity_34484", "image": "IconQA/images/58093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of flowers even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.265625, "loss": 4.095546245574951 }, { "id": "identity_131742", "image": "A-OKVQA/images/196341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any children present in the image?" }, { "from": "gpt", "value": "The answer is No children present" } ], "clip_score": 16.234375, "loss": 4.594130039215088 }, { "id": "identity_325110", "image": "PMC-VQA/images/PMC3438020_F1_153927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture this image?\nChoices:\n(A) computed tomography scan\n(B) magnetic resonance imaging scan\n(C) positron emission tomography scan\n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.234375, "loss": 4.684934139251709 }, { "id": "identity_34166", "image": "VQA2.0/images/324705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man in glasses holding?" }, { "from": "gpt", "value": "The answer is pole" } ], "clip_score": 17.0, "loss": 5.0322465896606445 }, { "id": "identity_198465", "image": "A-OKVQA/images/105865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many apples can be seen inside the refrigerator?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.9375, "loss": 4.557485580444336 }, { "id": "identity_31070", "image": "DVQA/images/bar_train_00138633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the sixth group of bars from the left?" }, { "from": "gpt", "value": "The answer is smoke" } ], "clip_score": 15.6328125, "loss": 4.748889446258545 }, { "id": "identity_314935", "image": "DVQA/images/bar_train_00079072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is model" } ], "clip_score": 19.796875, "loss": 4.025609016418457 }, { "id": "identity_126969", "image": "VQA2.0/images/90307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.453125, "loss": 4.26013708114624 }, { "id": "identity_147402", "image": "DocVQA/images/fqyf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the document?" }, { "from": "gpt", "value": "The answer is Program projections not requiring budget" } ], "clip_score": 21.734375, "loss": 4.796079635620117 }, { "id": "identity_215444", "image": "A-OKVQA/images/99518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the observation deck?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 20.5, "loss": 4.562304496765137 }, { "id": "identity_242641", "image": "PMC-VQA/images/PMC6300016_Fig1_413147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the location of the tumour lesion and describe the anatomical structures surrounding it" }, { "from": "gpt", "value": "The answer is Right rib, adjacent to soft tissue structures" } ], "clip_score": 29.0, "loss": 4.356013298034668 }, { "id": "identity_245939", "image": "TextVQA/images/24dc9ba1cc15d9e5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand name on the mat and how many times does it appear in the image?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 13.984375, "loss": 4.036138534545898 }, { "id": "identity_31587", "image": "TabMWP/images/15377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.96875, "loss": 4.427720546722412 }, { "id": "identity_33971", "image": "PMC-VQA/images/PMC8931287_f2_225612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain is affected in the MRI image?\nChoices:\n(A) Temporal lobe \n(B) Parietal lobe \n(C) Occipital lobe \n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.03125, "loss": 4.036947727203369 }, { "id": "identity_300397", "image": "A-OKVQA/images/269098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this lamp generally used for?\nChoices:\n(A) crafting\n(B) dancing\n(C) cooking\n(D) sleeping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.9140625, "loss": 4.44893217086792 }, { "id": "identity_266046", "image": "PMC-VQA/images/PMC5655893_Fig7_231377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color best describes the object being discussed? \nChoices:\n(A) Black \n(B) Brown \n(C) Red \n(D) Green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.875, "loss": 4.9371747970581055 }, { "id": "identity_263405", "image": "ScienceQA/images/7438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the term that corresponds to the image:\nOptions:\n(A) phloem\n(B) xylem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.984375, "loss": 4.646668434143066 }, { "id": "identity_272286", "image": "A-OKVQA/images/176873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the lane painted green?\nChoices:\n(A) minivans only\n(B) pedestrians only\n(C) bikes only\n(D) keep out" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.140625, "loss": 4.2350568771362305 }, { "id": "identity_314861", "image": "TextVQA/images/8d6ae29de134eeef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of this?" }, { "from": "gpt", "value": "The answer is hermis" } ], "clip_score": 26.234375, "loss": 4.811550617218018 }, { "id": "identity_324056", "image": "PMC-VQA/images/PMC5721319_F2_250258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red line indicating in this image?\nChoices:\n(A) Femoral neck offset\n(B) Acetabular index\n(C) Hip joint center\n(D) Proximal femoral axis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.796875, "loss": 4.860856056213379 }, { "id": "identity_325171", "image": "FigureQA/images/56039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.609375, "loss": 4.059072494506836 }, { "id": "identity_25192", "image": "PMC-VQA/images/PMC3540650_fig1_177765.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is affected in this image? \nChoices:\n(A) Superior rectus muscle \n(B) Lateral rectus muscle \n(C) Inferior oblique muscle \n(D) Medial rectus muscle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.03125, "loss": 4.334792137145996 }, { "id": "identity_51186", "image": "FigureQA/images/29394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a smaller representation than the Blue category?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.609375, "loss": 4.767821788787842 }, { "id": "identity_9846", "image": "Super-CLEVR/images/superCLEVR_new_001266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte thing that has the same shape as the red shiny object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 13.4921875, "loss": 5.099637031555176 }, { "id": "identity_290460", "image": "IconQA/images/8112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape from the following options: triangle, square, circle.\nChoices:\n(A) triangle\n(B) square\n(C) circle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.08395528793335 }, { "id": "identity_1796", "image": "ScienceQA/images/6233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Ohio\n(B) North Carolina\n(C) Massachusetts\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.703125, "loss": 4.488356113433838 }, { "id": "identity_312886", "image": "PMC-VQA/images/PMC6713910_F6_30138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to distinguish between different cell types in the brain?\nChoices:\n(A) X-ray imaging\n(B) MRI\n(C) Double-labeling immunofluorescence\n(D) CT scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.890625, "loss": 4.072343826293945 }, { "id": "identity_21055", "image": "PMC-VQA/images/PMC9182059_molecules-27-03631-f005_306952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the double arrow in the image indicate? \nChoices:\n(A) Expansion \n(B) Immobilization \n(C) Control \n(D) Double magnification " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5, "loss": 4.4349045753479 }, { "id": "identity_304661", "image": "ScienceQA/images/294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) shiny\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.1875, "loss": 4.335727214813232 }, { "id": "identity_4197", "image": "A-OKVQA/images/406075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white object on the man's waist used for?\nChoices:\n(A) floating\n(B) slimming\n(C) dress code\n(D) protecting backbone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.6875, "loss": 4.566163063049316 }, { "id": "identity_326354", "image": "PMC-VQA/images/PMC5400329_F2_155452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the disease shown in these radiological findings?\nChoices:\n(A) Fibrosing alveolitis\n(B) Pulmonary sarcoidosis\n(C) COPD\n(D) Emphysema" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.166989803314209 }, { "id": "identity_269516", "image": "IconQA/images/13774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity did Stacy do second on Wednesday after lunch, playing soccer or talking on the phone?\nChoices:\n(A) played soccer\n(B) talked on the phone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.15625, "loss": 4.098801612854004 }, { "id": "identity_123662", "image": "PMC-VQA/images/PMC5964484_fig5_317749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrowheads are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.53125, "loss": 4.398526191711426 }, { "id": "identity_115725", "image": "VQA2.0/images/478227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vegetables can be seen on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.75, "loss": 4.088963985443115 }, { "id": "identity_264318", "image": "FigureQA/images/49655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown considered the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.703125, "loss": 4.6059746742248535 }, { "id": "identity_272651", "image": "PMC-VQA/images/PMC7812302_jcmm16141-fig-0006_344268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the green arrows in the micrographs?\nChoices:\n(A) Single cells \n(B) Extracellular matrix \n(C) Cell clusters \n(D) GelMA hydrogel fibers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.28125, "loss": 4.713469505310059 }, { "id": "identity_21163", "image": "A-OKVQA/images/208067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What natural event seems to have occurred here?\nChoices:\n(A) hurricane\n(B) thunder\n(C) snow\n(D) tornado" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.109375, "loss": 4.402485370635986 }, { "id": "identity_302853", "image": "DVQA/images/bar_train_00186512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest total units sold across all stores?" }, { "from": "gpt", "value": "The answer is notion" } ], "clip_score": 31.8125, "loss": 5.159510612487793 }, { "id": "identity_266449", "image": "VizWiz/images/VizWiz_train_00016564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please identify the weight of this size?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 24.75, "loss": 4.110908508300781 }, { "id": "identity_320459", "image": "VQA-AS/images/13601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there someone in this place?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 22.796875, "loss": 5.068032741546631 }, { "id": "identity_187443", "image": "A-OKVQA/images/284310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street signs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.78125, "loss": 4.26340389251709 }, { "id": "identity_13186", "image": "DVQA/images/bar_train_00028687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.421875, "loss": 4.341963291168213 }, { "id": "identity_18614", "image": "PMC-VQA/images/PMC9078666_fig2_274094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing? \nChoices:\n(A) Live silk worms \n(B) silk spheres \n(C) eTuSp1 Drug \n(D) Unknown substance " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.019740104675293 }, { "id": "identity_11498", "image": "ScienceQA/images/15990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Massachusetts\n(C) Connecticut\n(D) New Hampshire" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.125, "loss": 4.454936981201172 }, { "id": "identity_26663", "image": "A-OKVQA/images/507927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type setting are the bikers?\nChoices:\n(A) city\n(B) collegiate\n(C) mall\n(D) farm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.96875, "loss": 4.522932052612305 }, { "id": "identity_205950", "image": "PMC-VQA/images/PMC7296345_fig5_192972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any labeling or annotation present on the radiograph?" }, { "from": "gpt", "value": "The answer is Yes, a scale label" } ], "clip_score": 24.5, "loss": 4.068887710571289 }, { "id": "identity_281906", "image": "PMC-VQA/images/PMC8933768_brb32490-fig-0003_226526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of injury causes small hemorrhagic lesions in the FLAIR and PRESTO images?\nChoices:\n(A) Epidural hematoma\n(B) Subdural hematoma\n(C) Diffuse axonal injury\n(D) Concussion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.157045364379883 }, { "id": "identity_223559", "image": "TextVQA/images/032f63f7a99ec39e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of map is shown in the rightmost brochure at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Historical Site Map" } ], "clip_score": 19.171875, "loss": 4.449371814727783 }, { "id": "identity_332668", "image": "A-OKVQA/images/305170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What tragic event is this place known for?\nChoices:\n(A) church day\n(B) ww2\n(C) 9/11\n(D) olympic games" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.5078125, "loss": 4.310231685638428 }, { "id": "identity_124710", "image": "VQA2.0/images/539719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many framed pictures are visible on the desk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.09375, "loss": 4.861865997314453 }, { "id": "identity_323397", "image": "PMC-VQA/images/PMC6945702_Fig1_95195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the liver is affected here?\nChoices:\n(A) Right lobe\n(B) Left lobe\n(C) Caudate lobe\n(D) Quadrate lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.625, "loss": 4.382491111755371 }, { "id": "identity_322265", "image": "ScienceQA/images/31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does pollen help a plant achieve?\nChoices:\n(A) make seeds\n(B) grow bigger\n(C) grow new leaves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.671875, "loss": 4.069450378417969 }, { "id": "identity_23679", "image": "ScienceQA/images/13028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) hard\n(B) flexible" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.625, "loss": 4.634172439575195 }, { "id": "identity_23249", "image": "PMC-VQA/images/PMC9228668_polymers-14-02360-f001_323007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging mode used in the SEM micrographs?\nChoices:\n(A) BSE mode \n(B) SE mode \n(C) EDS mode \n(D) CL mode" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 5.109349250793457 }, { "id": "identity_320131", "image": "PMC-VQA/images/PMC6300016_Fig1_413147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the tumour lesion located?\nChoices:\n(A) Left lung\n(B) Right rib\n(C) Left chest wall\n(D) Spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.234375, "loss": 4.3745222091674805 }, { "id": "identity_284134", "image": "PMC-VQA/images/PMC2248189_F3_17874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red color in the image? Is it:\n(A) EGFP fluorescence\n(B) Organelle marker\n(C) Secondary antibody\n(D) Background noise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 5.188849449157715 }, { "id": "identity_132225", "image": "Super-CLEVR/images/superCLEVR_new_001759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more airplanes or motorcycles in the image?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 23.09375, "loss": 4.021347522735596 }, { "id": "identity_277758", "image": "TabMWP/images/4927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.515625, "loss": 4.561018466949463 }, { "id": "identity_309497", "image": "VQA-AS/images/11204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this person sitting on?" }, { "from": "gpt", "value": "The answer is blanket" } ], "clip_score": 22.21875, "loss": 4.066202640533447 }, { "id": "identity_290237", "image": "PMC-VQA/images/PMC4330237_Fig15_358901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the PTG?\nChoices:\n(A) Alveolar fracture and dislocation\n(B) Only dislocation of the alveolar\n(C) Root sockets of the teeth\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.09375, "loss": 4.161275386810303 }, { "id": "identity_284944", "image": "PMC-VQA/images/PMC7881081_Fig20_367266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of tumors is depicted in the image provided?\nChoices:\n(A) benign tumors\n(B) malignant tumors\n(C) cysts\n(D) fibromas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.772040843963623 }, { "id": "identity_337540", "image": "PMC-VQA/images/PMC7892262_F4_370539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in this CT scan?\nChoices:\n(A) Heart \n(B) Lungs \n(C) Liver \n(D) Kidneys" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.171875, "loss": 4.933361530303955 }, { "id": "identity_297114", "image": "VizWiz/images/VizWiz_train_00015910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.3515625, "loss": 4.12539005279541 }, { "id": "identity_274981", "image": "Super-CLEVR/images/superCLEVR_new_000782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything located behind the object that is to the right of the small object behind the shiny regular bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.5625, "loss": 4.9937639236450195 }, { "id": "identity_319552", "image": "A-OKVQA/images/469605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is behind the person that is standing?\nChoices:\n(A) fish\n(B) dogs\n(C) tents\n(D) barbed wire" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.15625, "loss": 4.6692657470703125 }, { "id": "identity_209031", "image": "A-OKVQA/images/309371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.375, "loss": 4.746548652648926 }, { "id": "identity_5958", "image": "TQA/images/abc_question_images/hair_follicles_16989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the bulge?\nChoices:\n(A) K\n(B) N\n(C) R\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 4.306818008422852 }, { "id": "identity_209187", "image": "A-OKVQA/images/4424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rings are visible on the person's hand?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 10.234375, "loss": 4.602972507476807 }, { "id": "identity_276089", "image": "Super-CLEVR/images/superCLEVR_new_001894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the small gray object in front of the green rubber motorbike and the blue matte thing behind the small metal bicycle of the same shape?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.03125, "loss": 5.075011730194092 }, { "id": "identity_21268", "image": "PMC-VQA/images/PMC7339797_Fig4_206309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What medical imaging technique was used to capture the image? \nChoices:\n(A) X-Ray \n(B) CT Scan \n(C) MRI \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.640625, "loss": 4.0164384841918945 }, { "id": "identity_111256", "image": "VQA2.0/images/13121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tiers does the cake have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.875, "loss": 4.179832935333252 }, { "id": "identity_303506", "image": "ScienceQA/images/7258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Reno\n(B) Carson City\n(C) Baton Rouge\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.171875, "loss": 4.501611709594727 }, { "id": "identity_333434", "image": "PMC-VQA/images/PMC6245846_Fig1_399036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which quadrant is the lipped liner being trialed? \nChoices:\n(A) Anterosuperior quadrant \n(B) Anteroinferior quadrant \n(C) Posterosuperior quadrant \n(D) Posteroinferior quadrant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.4140625, "loss": 4.290066242218018 }, { "id": "identity_304672", "image": "IconQA/images/7984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a house. Which image is tenth?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.703125, "loss": 4.306019306182861 }, { "id": "identity_212166", "image": "PMC-VQA/images/PMC5110515_F1_95757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the white matter hyperintensities seen in this image?" }, { "from": "gpt", "value": "The answer is Indicative of potential underlying conditions" } ], "clip_score": 33.59375, "loss": 4.302980422973633 }, { "id": "identity_276118", "image": "ScienceQA/images/14878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option provides the most accurate description of an ecosystem in a California tide pool?\nChoices:\n(A) a group of fluffy sculpins\n(B) the giant green anemones, the ochre sea stars, and the red octopuses\n(C) the rocks, the salt water, and the California mussels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.015625, "loss": 4.231922149658203 }, { "id": "identity_333353", "image": "IconQA/images/32704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to stop on?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.078125, "loss": 4.662489891052246 }, { "id": "identity_310481", "image": "TextVQA/images/006f378c1ca56fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is attributed with the quote in the image?" }, { "from": "gpt", "value": "The answer is pedro obispo" } ], "clip_score": 10.25, "loss": 4.17447566986084 }, { "id": "identity_117381", "image": "TextVQA/images/b385728633d4f34c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands can be identified from the boxes on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.734375, "loss": 4.102336406707764 }, { "id": "identity_307074", "image": "PMC-VQA/images/PMC4216464_RSTB20130541F4_332225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishes the images on the left and right?\nChoices:\n(A) Contrast\n(B) Focus\n(C) Color\n(D) Shape" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.28125, "loss": 4.566187858581543 }, { "id": "identity_20793", "image": "PMC-VQA/images/PMC5694160_F2_241941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the order of the axial skull reconstructions? \nChoices:\n(A) lower to upper \n(B) diagonal \n(C) left to right \n(D) upper to lower " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.65625, "loss": 4.165162563323975 }, { "id": "identity_8584", "image": "ScienceQA/images/16717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Richmond\n(C) Orlando\n(D) Columbia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.3125, "loss": 4.091008186340332 }, { "id": "identity_4246", "image": "AI2D/images/4061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this diagram, which letter represents the chromosphere?\nChoices:\n(A) G\n(B) F\n(C) E\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.0, "loss": 4.3430280685424805 }, { "id": "identity_217552", "image": "PMC-VQA/images/PMC7310451_Fig2_197442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is predominantly shown in the upper part of this image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 30.65625, "loss": 4.047636985778809 }, { "id": "identity_16418", "image": "FigureQA/images/10678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.734375, "loss": 4.139525890350342 }, { "id": "identity_292649", "image": "A-OKVQA/images/111436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man in the foreground engaged in?\nChoices:\n(A) hiding bike\n(B) eating lunch\n(C) stealing bike\n(D) repairing bike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.7734375, "loss": 4.790055274963379 }, { "id": "identity_313827", "image": "PMC-VQA/images/PMC7721805_Fig5_316463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate?\nChoices:\n(A) Nucleus\n(B) Cell membrane\n(C) Cytoplasm\n(D) Mitochondria" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.03125, "loss": 4.725070476531982 }, { "id": "identity_15179", "image": "AI2D/images/2122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Termites in the community below were destroyed, which population would be most directly affected?\nChoices:\n(A) Dingoes\n(B) Mice\n(C) Northern brown bandicoot\n(D) none of above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.578125, "loss": 4.220907211303711 }, { "id": "identity_208857", "image": "A-OKVQA/images/576650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing a hat?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.078125, "loss": 4.260207653045654 }, { "id": "identity_25426", "image": "AI2D/images/3457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagram of?\nChoices:\n(A) bird\n(B) insect\n(C) goat\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.6875, "loss": 4.249488353729248 }, { "id": "identity_13130", "image": "TQA/images/abc_question_images/faults_11724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is H?\nChoices:\n(A) HANGING WALL\n(B) FOOT WALL\n(C) NORMAL FAULT\n(D) WALL" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.159339904785156 }, { "id": "identity_131641", "image": "TextVQA/images/57e60e7639b3d40d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cheerleaders are performing stunts in the air?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 32.0, "loss": 4.4250359535217285 }, { "id": "identity_105381", "image": "VQA2.0/images/303076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bottle to the left of the pan on the stove?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.609375, "loss": 4.024050235748291 }, { "id": "identity_282522", "image": "PMC-VQA/images/PMC7339797_Fig4_206309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical imaging technique was utilized to capture the image: X-Ray, CT Scan, MRI, or Ultrasound?\nChoices:\n(A) X-Ray \n(B) CT Scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.08394718170166 }, { "id": "identity_272547", "image": "A-OKVQA/images/387597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the horse currently engaged in?\nChoices:\n(A) sleeping\n(B) feeding\n(C) walking\n(D) leaping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.328125, "loss": 4.291078090667725 }, { "id": "identity_402", "image": "A-OKVQA/images/269098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what activity is the standing lamp typically used?\nChoices:\n(A) crafting\n(B) dancing\n(C) cooking\n(D) sleeping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.6875, "loss": 4.468938827514648 }, { "id": "identity_212743", "image": "VQA2.0/images/205243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red buoys can be seen in the water?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.6875, "loss": 4.135710716247559 }, { "id": "identity_281194", "image": "Super-CLEVR/images/superCLEVR_new_000687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the articulated bus identical to those of the red object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.421875, "loss": 4.999899864196777 }, { "id": "identity_1966", "image": "A-OKVQA/images/270812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name for the tallest building?\nChoices:\n(A) clock tower\n(B) library\n(C) station\n(D) pub" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.296875, "loss": 4.312072277069092 }, { "id": "identity_13394", "image": "TQA/images/question_images/earth_magnetic_field_6774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the magnetic north pole located?\nChoices:\n(A) Equator\n(B) West of the geographic north pole\n(C) At the rotation axis\n(D) East of the geographic north pole" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.40625, "loss": 4.174648761749268 }, { "id": "identity_324147", "image": "PMC-VQA/images/PMC3751478_F3_226867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image was utilized to verify the atrophy of the vermis and the cerebellar hemispheres?\nChoices:\n(A) PET image\n(B) CT scan image\n(C) T2-weighted image\n(D) MRI image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.59375, "loss": 4.32183837890625 }, { "id": "identity_336558", "image": "DocVQA/images/nldg0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of fund for this demonstration?" }, { "from": "gpt", "value": "The answer is WOH FUND" } ], "clip_score": 22.78125, "loss": 4.718059539794922 }, { "id": "identity_211887", "image": "A-OKVQA/images/110105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the top of the sign in the background?" }, { "from": "gpt", "value": "The answer is Småtrollcupen" } ], "clip_score": 21.75, "loss": 4.767800807952881 }, { "id": "identity_274395", "image": "A-OKVQA/images/439299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What new addition has been made to this surfboard?\nChoices:\n(A) wheels\n(B) seat\n(C) umbrella\n(D) pole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.195435047149658 }, { "id": "identity_112490", "image": "A-OKVQA/images/273200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the batter's uniform?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.671875, "loss": 4.387751579284668 }, { "id": "identity_262670", "image": "TextVQA/images/ca007ca0bbae69ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letters are adjacent to the number 3?" }, { "from": "gpt", "value": "The answer is def" } ], "clip_score": 16.15625, "loss": 4.464102268218994 }, { "id": "identity_252609", "image": "IconQA/images/63759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not green, and what is the sum of the numerators and denominators of the fractions representing the green and non-green parts?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 33.96875, "loss": 4.6692070960998535 }, { "id": "identity_333658", "image": "PMC-VQA/images/PMC4978830_fig1_61167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary observation in this CT scan?\nChoices:\n(A) Increase in intracranial pressure\n(B) Pulmonary fibrosis\n(C) Lymphadenopathy and soft tissue involvement\n(D) Acute abdomen features" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.578125, "loss": 4.793589115142822 }, { "id": "identity_124202", "image": "VQA2.0/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible Christmas stockings are hanging in the room?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.40625, "loss": 4.89091682434082 }, { "id": "identity_110774", "image": "PMC-VQA/images/PMC8139191_f1_446949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used to label different areas in this image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.453125, "loss": 4.3279523849487305 }, { "id": "identity_291231", "image": "A-OKVQA/images/338189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the orange circles utilized?\nChoices:\n(A) pillows\n(B) flotation\n(C) fishing\n(D) decoration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9453125, "loss": 4.842870712280273 }, { "id": "identity_282956", "image": "PMC-VQA/images/PMC7927443_f1_383050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the provided scans, which lung is depicted?\nChoices:\n(A) Left lung\n(B) Both lungs\n(C) Right lung\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.328125, "loss": 4.383061408996582 }, { "id": "identity_306078", "image": "PMC-VQA/images/PMC8539564_jpm-11-01041-f001_93437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis based on these findings?\nChoices:\n(A) Metastatic cancer\n(B) Hemangioma\n(C) Hepatocellular carcinoma\n(D) Fibrolamellar carcinoma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.74868106842041 }, { "id": "identity_263908", "image": "ScienceQA/images/7601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city on the map is indicated by the marking? \nChoices:\n(A) Nashville\n(B) Austin\n(C) Memphis\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.328125, "loss": 4.4394917488098145 }, { "id": "identity_315871", "image": "A-OKVQA/images/451679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was this oven constructed?\nChoices:\n(A) here\n(B) mexican factory\n(C) mall\n(D) farm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.09375, "loss": 4.053904056549072 }, { "id": "identity_6894", "image": "A-OKVQA/images/531324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is sold inside of this street store?\nChoices:\n(A) beer\n(B) coffee\n(C) pizza\n(D) weed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1015625, "loss": 4.113842964172363 }, { "id": "identity_278042", "image": "A-OKVQA/images/225919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which person put this animal in the box? \nOptions:\n(A) hobo\n(B) dorothy\n(C) bike rider\n(D) wicked witch" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.390625, "loss": 4.1992506980896 }, { "id": "identity_287382", "image": "VizWiz/images/VizWiz_train_00014023.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the current time?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.96875, "loss": 4.204489707946777 }, { "id": "identity_9725", "image": "FigureQA/images/80452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Navy Blue have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.78125, "loss": 4.322863578796387 }, { "id": "identity_121920", "image": "PMC-VQA/images/PMC7325357_FIG2_202018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the possible medical condition indicated by the imaging?" }, { "from": "gpt", "value": "The answer is Past or resolved left MCA occlusion" } ], "clip_score": 31.703125, "loss": 4.700780868530273 }, { "id": "identity_326979", "image": "Super-CLEVR/images/superCLEVR_new_000824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that has the same shape as this small green metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.6875, "loss": 4.869706153869629 }, { "id": "identity_336557", "image": "A-OKVQA/images/92804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing?\nChoices:\n(A) seeking shelter\n(B) hiding\n(C) waiting turn\n(D) sleeping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.4375, "loss": 4.424278259277344 }, { "id": "identity_311010", "image": "TQA/images/abc_question_images/faults_11734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Footwall side?\nChoices:\n(A) T\n(B) H\n(C) K\n(D) N" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.125, "loss": 4.144656658172607 }, { "id": "identity_321704", "image": "PMC-VQA/images/PMC5446551_Fig6_169581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the lesions located?\nChoices:\n(A) Right basal ganglia\n(B) Left basal ganglia\n(C) Cerebellum\n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.15625, "loss": 4.332343101501465 }, { "id": "identity_138406", "image": "ChartQA/images/84985912004897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region has the second highest share of population with mental health or development disorder in 1999?" }, { "from": "gpt", "value": "The answer is Congo" } ], "clip_score": 30.765625, "loss": 4.3061017990112305 }, { "id": "identity_143055", "image": "MapQA/images/map_2609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of games between 35.0%-47.3%?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 34.53125, "loss": 4.127706050872803 }, { "id": "identity_329480", "image": "TQA/images/question_images/human_system_digestive_3687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occurs if something gets lodged in the esophagus?\nChoices:\n(A) constipation\n(B) indigestion\n(C) kidney will not function\n(D) there will be difficulty in breathing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.484375, "loss": 4.058816432952881 }, { "id": "identity_303157", "image": "PMC-VQA/images/PMC3087470_fig1_94393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the left adrenal area?\nChoices:\n(A) Cystic mass\n(B) Hypoechoic solid mass\n(C) Hyperechoic solid mass\n(D) No mass seen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.49744176864624 }, { "id": "identity_26786", "image": "PMC-VQA/images/PMC7344590_diagnostics-10-00385-f001_207846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for the diagnosis? \nChoices:\n(A) CT scan \n(B) MRI scan \n(C) PET scan \n(D) X-ray scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.046875, "loss": 4.098273754119873 }, { "id": "identity_286482", "image": "PMC-VQA/images/PMC6365311_Fig8_432965.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the images among the following options: X-Ray, MRI, Ultrasound, or CT?\nChoices:\n(A) X-Ray \n(B) MRI \n(C) Ultrasound \n(D) CT" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.75, "loss": 4.342309474945068 }, { "id": "identity_134367", "image": "DocVQA/images/lzlh0077_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic discussed in the opinion section?" }, { "from": "gpt", "value": "The answer is Prohibiting smoking and government intervention" } ], "clip_score": 22.40625, "loss": 4.254537105560303 }, { "id": "identity_295719", "image": "VizWiz/images/VizWiz_train_00011243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what this says?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.859375, "loss": 4.371458530426025 }, { "id": "identity_269045", "image": "FigureQA/images/46018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.90625, "loss": 4.036141395568848 }, { "id": "identity_7409", "image": "PMC-VQA/images/PMC5980103_Fig3_322321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in the image? \nChoices:\n(A) dental pulp \n(B) amelogenin \n(C) enamel \n(D) epithelial pearls " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.375, "loss": 4.254788398742676 }, { "id": "identity_321634", "image": "PMC-VQA/images/PMC6937827_Fig3_92591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body parts are depicted in the image of the 3D printed model?\nChoices:\n(A) Arm and Hand\n(B) Leg and Foot\n(C) Head and Shoulders\n(D) Stomach and Chest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.203125, "loss": 4.124225616455078 }, { "id": "identity_227906", "image": "PMC-VQA/images/PMC4048546_f0005_295508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the MRI image, what are the possible implications of the absence of iron deposition in the brain, and how might this affect the diagnosis of neurodegenerative diseases?" }, { "from": "gpt", "value": "The answer is Consider alternative neurological conditions without iron accumulation" } ], "clip_score": 32.96875, "loss": 4.2915940284729 }, { "id": "identity_199985", "image": "PMC-VQA/images/PMC7321844_Fig1_200931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the coil-like structure indicated by the white arrowheads?" }, { "from": "gpt", "value": "The answer is Embolization" } ], "clip_score": 28.4375, "loss": 4.0178608894348145 }, { "id": "identity_303131", "image": "PMC-VQA/images/PMC8858386_f1_198236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the strong signal area in this image correspond to?\nChoices:\n(A) bone\n(B) muscle\n(C) nerve\n(D) blood vessel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 5.312106132507324 }, { "id": "identity_153805", "image": "ScienceQA/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used in the object's design?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.140625, "loss": 4.185116767883301 }, { "id": "identity_305692", "image": "AI2D/images/3318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label could be associated with movement?\nChoices:\n(A) R\n(B) S\n(C) C\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.59375, "loss": 4.5175557136535645 }, { "id": "identity_195515", "image": "TextVQA/images/2633f60ac1bc0ea9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tiers of seating are visible in the stadium?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.625, "loss": 4.077544689178467 }, { "id": "identity_20746", "image": "PMC-VQA/images/PMC4064991_pone-0100196-g006_300125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrowheads indicate in the images? \nChoices:\n(A) cell bodies \n(B) nucleus \n(C) neurite outgrowth \n(D) axon extension " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.890625, "loss": 4.248445987701416 }, { "id": "identity_264203", "image": "Super-CLEVR/images/superCLEVR_new_001251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the colors of the rubber minivan and the matte wagon the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.546875, "loss": 5.533279895782471 }, { "id": "identity_248437", "image": "A-OKVQA/images/229149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is the man in the blue sweatshirt likely photographing, and what clues in the image support this?" }, { "from": "gpt", "value": "The answer is A cycling race or event" } ], "clip_score": 29.75, "loss": 4.594000816345215 }, { "id": "identity_120055", "image": "TextVQA/images/1b1a6fab9e5536ee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.0625, "loss": 4.391949653625488 }, { "id": "identity_317913", "image": "TQA/images/question_images/human_system_digestive_3740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is connected to this?\nChoices:\n(A) Salivary Glands\n(B) Gallbladder\n(C) Large Intestine\n(D) Esophagus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.78125, "loss": 4.098687171936035 }, { "id": "identity_196274", "image": "A-OKVQA/images/271118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible gift bags are there in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.390625, "loss": 4.585912227630615 }, { "id": "identity_21328", "image": "PMC-VQA/images/PMC6822922_FIG1_59149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body does the patient have an osteoid osteoma? \nChoices:\n(A) Right tibia \n(B) Right femur \n(C) Left tibia \n(D) Left femur " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 4.554635524749756 }, { "id": "identity_295597", "image": "DVQA/images/bar_train_00150451.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.125, "loss": 4.405128479003906 }, { "id": "identity_116073", "image": "A-OKVQA/images/402228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are holding a gaming controller?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.203125, "loss": 4.658255100250244 }, { "id": "identity_261411", "image": "PMC-VQA/images/PMC7692147_jcm-09-03411-f002_305983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of X-ray is shown in the image?\nChoices:\n(A) Lateral X-ray\n(B) Anteroposterior X-ray\n(C) CT scan\n(D) MRI scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.63585901260376 }, { "id": "identity_20469", "image": "PMC-VQA/images/PMC6378289_F1_437160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the indication of joint effusion in the hip? \nChoices:\n(A) Synovial thickening \n(B) Demineralization \n(C) Bulging fat pad \n(D) Blurred cortical margins " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.875, "loss": 4.065916538238525 }, { "id": "identity_183489", "image": "VQA2.0/images/328473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes are visible on the street?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.65625, "loss": 4.234584808349609 }, { "id": "identity_331060", "image": "A-OKVQA/images/267203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is shown here?\nChoices:\n(A) ferry\n(B) plane\n(C) car\n(D) rail" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6015625, "loss": 4.185782432556152 }, { "id": "identity_189951", "image": "VQA2.0/images/410590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the picture?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.453125, "loss": 4.544045448303223 }, { "id": "identity_26111", "image": "TabMWP/images/28898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.147816181182861 }, { "id": "identity_264690", "image": "Super-CLEVR/images/superCLEVR_new_001277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a greater number of large tandem bikes to the left of the wagon compared to small red double buses?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.125, "loss": 5.14582633972168 }, { "id": "identity_9019", "image": "FigureQA/images/60598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.6875, "loss": 4.60560941696167 }, { "id": "identity_279395", "image": "PMC-VQA/images/PMC8579051_f2_107319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI scan was utilized to capture the image: \n(A) T1-weighted image \n(B) Diffusion-weighted image \n(C) T2-weighted image \n(D) Functional MRI scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.53125, "loss": 4.156250953674316 }, { "id": "identity_266320", "image": "AI2D/images/3084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is considered the outer part of the cell?\n(A) Cell membrane\n(B) Lysosome\n(C) Nucleus\n(D) Ribosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.21875, "loss": 4.157865047454834 }, { "id": "identity_319627", "image": "A-OKVQA/images/268306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the large glass object on this vehicle called?\nChoices:\n(A) headlight\n(B) turn light\n(C) break light\n(D) night light" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.484375, "loss": 4.69730281829834 }, { "id": "identity_14209", "image": "A-OKVQA/images/320978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vegetable is likely the most expensive item by piece or pound?\nChoices:\n(A) artichoke\n(B) celery\n(C) corn\n(D) turnips" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.90625, "loss": 4.214395046234131 }, { "id": "identity_257543", "image": "A-OKVQA/images/393464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children are actively participating in the activity, and what specific activity are they engaged in?" }, { "from": "gpt", "value": "The answer is Four children are playing" } ], "clip_score": 26.421875, "loss": 4.750929832458496 }, { "id": "identity_137006", "image": "DVQA/images/bar_train_00105570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is No label visible" } ], "clip_score": 28.046875, "loss": 4.895290374755859 }, { "id": "identity_320351", "image": "PMC-VQA/images/PMC7382491_Fig2_220258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining technique used in this image?\nChoices:\n(A) Thionin \n(B) Fluorescein \n(C) Nissl \n(D) Hematoxylin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.0625, "loss": 4.17896842956543 }, { "id": "identity_15401", "image": "TQA/images/question_images/cycle_rock_6751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which rock is formed through heat and pressure?\nChoices:\n(A) igneous rock\n(B) sedimentary rock\n(C) magma\n(D) metamorphic rocks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.71875, "loss": 4.286350250244141 }, { "id": "identity_1838", "image": "A-OKVQA/images/323421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items are found on the wall?\nChoices:\n(A) posters\n(B) hams\n(C) pots\n(D) sausages" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.40625, "loss": 4.153385162353516 }, { "id": "identity_323690", "image": "VQA2.0/images/325215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of place is this?" }, { "from": "gpt", "value": "The answer is printing" } ], "clip_score": 14.9375, "loss": 4.070145130157471 }, { "id": "identity_324428", "image": "AI2D/images/2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents this phase?\nChoices:\n(A) C\n(B) F\n(C) E\n(D) G" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.375, "loss": 4.072023868560791 }, { "id": "identity_253247", "image": "PMC-VQA/images/PMC6920284_Fig5_86553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in this MR scan, and what abnormalities can be identified based on the image?" }, { "from": "gpt", "value": "The answer is Mass in the right orbit affecting surrounding structures" } ], "clip_score": 30.578125, "loss": 5.035544395446777 }, { "id": "identity_38178", "image": "A-OKVQA/images/388374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are growing in this location?\nChoices:\n(A) pine trees\n(B) palm trees\n(C) willow trees\n(D) birch trees" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4375, "loss": 4.026093482971191 }, { "id": "identity_308686", "image": "TQA/images/question_images/cycle_water_4978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which process is responsible for this?\nChoices:\n(A) Transpiration\n(B) Precipitation\n(C) Evaporation\n(D) Collection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.828125, "loss": 4.63763427734375 }, { "id": "identity_309449", "image": "TextVQA/images/f717dc6f8d6025ab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify a brand visible in this place?" }, { "from": "gpt", "value": "The answer is contessa" } ], "clip_score": 15.8671875, "loss": 4.554484844207764 }, { "id": "identity_2795", "image": "FigureQA/images/93407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.609375, "loss": 4.3507513999938965 }, { "id": "identity_291905", "image": "ScienceQA/images/5332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Virginia\n(B) Florida\n(C) Tennessee\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.609375, "loss": 4.489163875579834 }, { "id": "identity_311067", "image": "AI2D/images/4292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of layer A?\nChoices:\n(A) Asthenosphere\n(B) Lithosphere\n(C) Continental crust\n(D) Oceanic crust" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.25, "loss": 4.160754203796387 }, { "id": "identity_38410", "image": "VQA2.0/images/332013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the beer company shown?" }, { "from": "gpt", "value": "The answer is budweiser" } ], "clip_score": 11.21875, "loss": 4.187525272369385 }, { "id": "identity_6502", "image": "DVQA/images/bar_train_00098746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is wonder" } ], "clip_score": 24.203125, "loss": 4.102653503417969 }, { "id": "identity_31448", "image": "ScienceQA/images/18408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) North America\n(B) Africa\n(C) Antarctica\n(D) Asia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.9375, "loss": 4.452846527099609 }, { "id": "identity_27901", "image": "FigureQA/images/50789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Mint the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.875, "loss": 4.070579528808594 }, { "id": "identity_331552", "image": "TabMWP/images/15377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.765625, "loss": 4.3391900062561035 }, { "id": "identity_213676", "image": "VQA2.0/images/257388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.890625, "loss": 4.716238021850586 }, { "id": "identity_280738", "image": "DVQA/images/bar_train_00051294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all categories, which object is the least preferred by the total number of people?" }, { "from": "gpt", "value": "The answer is hour" } ], "clip_score": 28.3125, "loss": 4.164157867431641 }, { "id": "identity_284826", "image": "PMC-VQA/images/PMC6985450_F1_106968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of shunt is depicted in the image provided? \nOptions:\n(A) Right to left shunt\n(B) Left to right shunt\n(C) Bidirectional shunt\n(D) No shunt visible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.71875, "loss": 4.312008857727051 }, { "id": "identity_331218", "image": "DocVQA/images/jnyb0018_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the attendees for the breakfast event on Monday, May 8?" }, { "from": "gpt", "value": "The answer is jim helm" } ], "clip_score": 27.328125, "loss": 5.359821796417236 }, { "id": "identity_14342", "image": "ScienceQA/images/1947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the arrows, which of the following organisms is a decomposer?\nChoices:\n(A) mushroom\n(B) lichen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.78125, "loss": 4.259086608886719 }, { "id": "identity_279101", "image": "A-OKVQA/images/201141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the green stalk used? \n(A) sale\n(B) decoration\n(C) growing bananas\n(D) hanging bananas" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.515625, "loss": 4.146549224853516 }, { "id": "identity_251641", "image": "Super-CLEVR/images/superCLEVR_new_001629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shiny object that is the same size as the metal school bus, and how many wheels does it have?" }, { "from": "gpt", "value": "The answer is Red, four" } ], "clip_score": 27.765625, "loss": 4.111560821533203 }, { "id": "identity_272351", "image": "Super-CLEVR/images/superCLEVR_new_001607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more rubber double busses on the left side of the large green SUV than yellow metal bicycles in front of the small bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 35.15625, "loss": 4.9782514572143555 }, { "id": "identity_213635", "image": "PMC-VQA/images/PMC3955295_fig6915_274286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure indicated by the arrow labeled \"M\"?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 23.703125, "loss": 4.5533647537231445 }, { "id": "identity_72791", "image": "ScienceQA/images/18295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process likely led to the formation of the rock in the image?" }, { "from": "gpt", "value": "The answer is Lithification" } ], "clip_score": 27.09375, "loss": 4.379151821136475 }, { "id": "identity_36117", "image": "PMC-VQA/images/PMC6739015_F1_36540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic method used to locate the abdominal mass lesion? \nChoices:\n(A) MRI \n(B) CT scan \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.25, "loss": 4.326303482055664 }, { "id": "identity_314068", "image": "TQA/images/abc_question_images/human_system_respiratory_11403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the Larynx?\nChoices:\n(A) T\n(B) A\n(C) V\n(D) X" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5625, "loss": 4.252200126647949 }, { "id": "identity_312608", "image": "Super-CLEVR/images/superCLEVR_new_000164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small purple matte items that share the shape of the yellow matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.078125, "loss": 5.260642051696777 }, { "id": "identity_293939", "image": "PMC-VQA/images/PMC6798454_Fig2_52632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the scale bars in the image?\nChoices:\n(A) Distance between the cells \n(B) Size of the region of interest \n(C) Magnification of the image \n(D) Time of recording" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.28125, "loss": 4.530920505523682 }, { "id": "identity_302595", "image": "TQA/images/question_images/dna_6764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What pairs with cytosine?\nChoices:\n(A) Cytosine.\n(B) Adenine.\n(C) Thymine.\n(D) Guanine." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.578125, "loss": 4.453610420227051 }, { "id": "identity_25560", "image": "TextVQA/images/ed15edb180c89ed2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: in what month does the show take place?" }, { "from": "gpt", "value": "The answer is mayo" } ], "clip_score": 12.0234375, "loss": 4.055920600891113 }, { "id": "identity_195418", "image": "VQA-AS/images/12755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather phenomenon is depicted to the left of the sun?" }, { "from": "gpt", "value": "The answer is A cloud" } ], "clip_score": 27.953125, "loss": 4.339416980743408 }, { "id": "identity_297436", "image": "PMC-VQA/images/PMC5449348_Fig3_170632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the Achilles pathology site indicated by the red brackets. \nOptions:\n(A) Insertional Achilles tendinopathy and superficial bursitis\n(B) Retrocalcaneal bursitis\n(C) Mid-portion Achilles tendinopathy\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.21875, "loss": 4.2627854347229 }, { "id": "identity_295070", "image": "A-OKVQA/images/259542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the elephants currently engaged in: playing, eating, sleeping, or washing?\nChoices:\n(A) playing\n(B) eating\n(C) sleeping\n(D) washing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.359375, "loss": 4.431446552276611 }, { "id": "identity_284656", "image": "PMC-VQA/images/PMC7983206_Fig1_397040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrows represent in the image?\nChoices:\n(A) external boundary of the ovary\n(B) follicles\n(C) corpora lutea\n(D) none of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.375, "loss": 4.9253129959106445 }, { "id": "identity_189840", "image": "A-OKVQA/images/133999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bank advertised on the bus?" }, { "from": "gpt", "value": "The answer is Banksur" } ], "clip_score": 23.5, "loss": 4.398242473602295 }, { "id": "identity_281596", "image": "TQA/images/question_images/ocean_waves_7126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Between which two points should you measure the wave in order to determine its height?\nChoices:\n(A) trough and still water level\n(B) crest and crest\n(C) Still water level and crest\n(D) trough and crest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.46875, "loss": 4.703357219696045 }, { "id": "identity_133123", "image": "TextVQA/images/44cb767484428d30.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the police officer's uniform cap?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.9375, "loss": 4.515300273895264 }, { "id": "identity_122591", "image": "A-OKVQA/images/268725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible logos or brand names are there on the rider's suit?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.390625, "loss": 4.526073455810547 }, { "id": "identity_257207", "image": "FigureQA/images/92279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the least variation in y-axis values?" }, { "from": "gpt", "value": "The answer is Dim Gray" } ], "clip_score": 27.3125, "loss": 5.2618727684021 }, { "id": "identity_32007", "image": "TabMWP/images/8534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Sasha wrote down how many minutes of TV she watched over the past 4 days. On which day did Sasha watch the most TV?\nChoices:\n(A) Tuesday\n(B) Wednesday\n(C) Thursday\n(D) Friday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.03125, "loss": 4.01877498626709 }, { "id": "identity_275142", "image": "FigureQA/images/87468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.609375, "loss": 4.037192344665527 }, { "id": "identity_307866", "image": "ScienceQA/images/18538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Provo\n(B) Olympia\n(C) Salt Lake City\n(D) Salem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.348014831542969 }, { "id": "identity_266207", "image": "AI2D/images/454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web provided, which of the following options correctly represents the transfer of energy:\n(A) Squid to elephant seal\n(B) Krill to zooplankton\n(C) Killer whale to elephant seal\n(D) Penguin to krill" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.90625, "loss": 4.221883773803711 }, { "id": "identity_282714", "image": "PMC-VQA/images/PMC6790021_Fig2_50301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which column in the graph represents distance?\nChoices:\n(A) Left column\n(B) Right column\n(C) Top column\n(D) Bottom column" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.79296875, "loss": 4.062571048736572 }, { "id": "identity_323731", "image": "A-OKVQA/images/329664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person in the square hold do here?\nChoices:\n(A) films\n(B) keeps score\n(C) hides\n(D) sleeps" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.546875, "loss": 4.140959739685059 }, { "id": "identity_310418", "image": "PMC-VQA/images/PMC6325806_Fig11_420541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the outer margins of these segments?\nChoices:\n(A) Overlap \n(B) Disarticulation \n(C) Telescoping \n(D) Constriction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.2578125, "loss": 4.6896257400512695 }, { "id": "identity_156554", "image": "AI2D/images/711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of rocks are shown in the rock cycle diagram?" }, { "from": "gpt", "value": "The answer is Three types" } ], "clip_score": 32.15625, "loss": 4.1709113121032715 }, { "id": "identity_217065", "image": "A-OKVQA/images/490739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely event taking place based on the attire and objects present?" }, { "from": "gpt", "value": "The answer is A formal religious or state ceremony" } ], "clip_score": 25.828125, "loss": 4.2115888595581055 }, { "id": "identity_216864", "image": "Super-CLEVR/images/superCLEVR_new_000273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.078125, "loss": 4.610104560852051 }, { "id": "identity_296941", "image": "PMC-VQA/images/PMC6291948_Fig1_411066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the lungs is the opacity located?\nChoices:\n(A) right lung\n(B) middle lung\n(C) left lung\n(D) both lungs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.53125, "loss": 4.655757427215576 }, { "id": "identity_315751", "image": "FigureQA/images/27891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.609375, "loss": 4.094156742095947 }, { "id": "identity_48737", "image": "DocVQA/images/qrdw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the email address provided for the Medical Editor?" }, { "from": "gpt", "value": "The answer is sparker@dwrite.com" } ], "clip_score": 20.21875, "loss": 4.143661022186279 }, { "id": "identity_328728", "image": "ScienceQA/images/2726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Saint Paul\n(B) Saint Louis\n(C) Helena\n(D) Minneapolis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 4.2636308670043945 }, { "id": "identity_274400", "image": "A-OKVQA/images/26506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location did these animals discover water?\nChoices:\n(A) on the beach\n(B) near a lake\n(C) near a pool\n(D) in a well" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.105803489685059 }, { "id": "identity_140683", "image": "DVQA/images/bar_train_00188817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label has the lowest total value when combining \"driver\" and \"author\" bars?" }, { "from": "gpt", "value": "The answer is dress" } ], "clip_score": 30.71875, "loss": 4.453336238861084 }, { "id": "identity_333921", "image": "ScienceQA/images/2404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) hard\n(B) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.6875, "loss": 4.302313804626465 }, { "id": "identity_295298", "image": "FigureQA/images/71726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.515625, "loss": 4.7877197265625 }, { "id": "identity_14899", "image": "A-OKVQA/images/430750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the man near the plane wearing a yellow vest?\nChoices:\n(A) punishment\n(B) visibility\n(C) to hide\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.25, "loss": 4.488247871398926 }, { "id": "identity_126980", "image": "PMC-VQA/images/PMC4151864_fig5_317318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the area indicated by the arrows?" }, { "from": "gpt", "value": "The answer is Measurement required for precise size" } ], "clip_score": 19.5625, "loss": 4.299668788909912 }, { "id": "identity_76745", "image": "AI2D/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.421875, "loss": 4.896237850189209 }, { "id": "identity_293095", "image": "A-OKVQA/images/306305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are these two girls currently located?\nChoices:\n(A) restaurant\n(B) liquor store\n(C) convenience store\n(D) supermarket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.125, "loss": 4.527608394622803 }, { "id": "identity_45498", "image": "DocVQA/images/khgl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the initials of the person who wrote the letter?" }, { "from": "gpt", "value": "The answer is CJB" } ], "clip_score": 23.625, "loss": 5.244914531707764 }, { "id": "identity_247854", "image": "VQA-AS/images/11888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total area of both windows combined?" }, { "from": "gpt", "value": "The answer is \\( 2hw \\)" } ], "clip_score": 19.140625, "loss": 4.825863361358643 }, { "id": "identity_33634", "image": "IconQA/images/89732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is fifth?\nChoices:\n(A) train\n(B) bear\n(C) plane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5, "loss": 4.535523414611816 }, { "id": "identity_337735", "image": "PMC-VQA/images/PMC4912769_Fig1_42855.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown artificially in this color? \nChoices:\n(A) segmented ramifications linked to individual microglia \n(B) unattributed ramifications in white \n(C) fluorescent protein \n(D) scale bar" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.984375, "loss": 4.792115211486816 }, { "id": "identity_292262", "image": "A-OKVQA/images/576650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which mode of transportation are the majority of people traveling?\nChoices:\n(A) car\n(B) train\n(C) bus\n(D) bicycle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.5, "loss": 4.219742298126221 }, { "id": "identity_318408", "image": "TextVQA/images/308a278386715574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is on the door of the yellow car?" }, { "from": "gpt", "value": "The answer is kyoto" } ], "clip_score": 18.8125, "loss": 4.424966335296631 }, { "id": "identity_117623", "image": "PMC-VQA/images/PMC8342271_Fig3_27574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the arrow pointing to in the image?" }, { "from": "gpt", "value": "The answer is Lower lumbar vertebra" } ], "clip_score": 31.890625, "loss": 4.237428188323975 }, { "id": "identity_287632", "image": "A-OKVQA/images/2315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinctive characteristic do these animals possess?\nChoices:\n(A) quills\n(B) gills\n(C) neck\n(D) trunk" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.25, "loss": 4.061370849609375 }, { "id": "identity_20275", "image": "A-OKVQA/images/69196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what building is the organization in question based?\nChoices:\n(A) pentagon\n(B) capitol\n(C) white house\n(D) lincoln memorial" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.296875, "loss": 4.341878890991211 }, { "id": "identity_202885", "image": "VizWiz/images/VizWiz_train_00001254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop?" }, { "from": "gpt", "value": "The answer is MSI" } ], "clip_score": 29.046875, "loss": 4.142120838165283 }, { "id": "identity_9386", "image": "A-OKVQA/images/223481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the grey car driving on?\nChoices:\n(A) parking lot\n(B) dirt\n(C) street\n(D) sand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.1875, "loss": 4.284486293792725 }, { "id": "identity_113856", "image": "Super-CLEVR/images/superCLEVR_new_001250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object directly above the blue motorbike?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 23.765625, "loss": 4.21297025680542 }, { "id": "identity_204287", "image": "TextVQA/images/3be4dac11cd47d76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.109375, "loss": 4.491301536560059 }, { "id": "identity_305253", "image": "PMC-VQA/images/PMC5989222_f0001_324804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging test is suggested to identify metastases from large-cell neuroendocrine carcinoma of the cervix in the chest area?\nChoices:\n(A) CT scan without contrast \n(B) MRI of the chest with gadolinium \n(C) PET scan \n(D) Chest X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.484375, "loss": 4.054268836975098 }, { "id": "identity_321440", "image": "PMC-VQA/images/PMC6476123_fig2_469075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the initial CT show at the time of admission?\nChoices:\n(A) Bilateral fracture of some medial mass of the sacrum. \n(B) Displaced bilateral fracture of some lateral mass of the sacrum. \n(C) Nondisplaced unilateral fracture of some medial mass of the sacrum. \n(D) Displaced unilateral fracture of some lateral mass of the sacrum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.020983695983887 }, { "id": "identity_272648", "image": "PMC-VQA/images/PMC8871555_diagnostics-12-00301-f005_204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the lobulated mass on the frontal chest radiograph that is situated on the right side?\nChoices:\n(A) Anterior mediastinum \n(B) Middle mediastinum \n(C) Posterior mediastinum \n(D) Superior mediastinum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.53125, "loss": 4.357461452484131 }, { "id": "identity_110187", "image": "VQA2.0/images/465018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cautionary road sign is present on the right side of the image?" }, { "from": "gpt", "value": "The answer is Caution Children\" sign" } ], "clip_score": 21.578125, "loss": 4.01858377456665 }, { "id": "identity_319066", "image": "FigureQA/images/62466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.34375, "loss": 4.570252895355225 }, { "id": "identity_267723", "image": "FigureQA/images/65190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Light Gold or Maroon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.90625, "loss": 4.611377239227295 }, { "id": "identity_317820", "image": "ScienceQA/images/2177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What describes the Steigerwald Forest ecosystem?\nChoices:\n(A) It has many different types of trees.\n(B) It has only a few types of trees.\n(C) It has soil that is poor in nutrients." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.390625, "loss": 4.3993096351623535 }, { "id": "identity_238418", "image": "A-OKVQA/images/317851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for the hood of the car being open, and which one is most likely based on the context of the image?" }, { "from": "gpt", "value": "The answer is For repairs" } ], "clip_score": 18.3125, "loss": 5.370275020599365 }, { "id": "identity_230935", "image": "TabMWP/images/3087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money will Judy have left if she buys a pair of shorts, a long-sleeved shirt, and a denim skirt?" }, { "from": "gpt", "value": "The answer is -3" } ], "clip_score": 36.21875, "loss": 4.366857528686523 }, { "id": "identity_269401", "image": "TQA/images/question_images/parts_worm_7313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to the storage of sperm?\nChoices:\n(A) Testes\n(B) Seminal Vesicle\n(C) Vas Deferens\n(D) Copulatory Bursa" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.953125, "loss": 4.374123573303223 }, { "id": "identity_26308", "image": "A-OKVQA/images/157635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the TV?\nChoices:\n(A) sporting event\n(B) judge show\n(C) cartoons\n(D) baking show" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.953125, "loss": 4.253356456756592 }, { "id": "identity_311574", "image": "FigureQA/images/87469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8359375, "loss": 4.122808456420898 }, { "id": "identity_291858", "image": "PMC-VQA/images/PMC6510869_Fig3_477974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image was utilized to calculate the Expanded Disability Status Scale score?\nOptions:\n(A) FLAIR image\n(B) T2-weighted image\n(C) T1-weighted image\n(D) Diffusion-weighted image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.875, "loss": 4.0505781173706055 }, { "id": "identity_32891", "image": "PMC-VQA/images/PMC6861597_fig0002_70162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is being corrected with N4 bias correction? \nChoices:\n(A) T1-weighted image \n(B) FLAIR image \n(C) T2-weighted image \n(D) DWI image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.6875, "loss": 4.326564788818359 }, { "id": "identity_303453", "image": "IconQA/images/45246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a plane. Which image is tenth?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 4.0104546546936035 }, { "id": "identity_32936", "image": "Super-CLEVR/images/superCLEVR_new_000348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the biplane and the red matte thing have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.984375, "loss": 5.354485988616943 }, { "id": "identity_328311", "image": "PMC-VQA/images/PMC7191704_Fig1_161372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to detect this condition?\nChoices:\n(A) Computed tomography \n(B) Magnetic Resonance Imaging \n(C) Mammography \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.28125, "loss": 4.3479719161987305 }, { "id": "identity_904", "image": "A-OKVQA/images/216581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they all doing?\nChoices:\n(A) resting\n(B) relaxing\n(C) sleeping\n(D) eating" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.1875, "loss": 4.164820194244385 }, { "id": "identity_154612", "image": "ScienceQA/images/10202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the largest Victoria amazonica leaf in the image?" }, { "from": "gpt", "value": "The answer is An estimation based on image scale" } ], "clip_score": 35.84375, "loss": 4.329023838043213 }, { "id": "identity_235614", "image": "TextVQA/images/5803b2404bf9470b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the traffic signal and the accompanying sign, and how should a driver proceed when encountering this combination?" }, { "from": "gpt", "value": "The answer is Stop and make a right turn if safe" } ], "clip_score": 30.9375, "loss": 4.176377296447754 }, { "id": "identity_219844", "image": "VQA2.0/images/186405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dining tables are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.921875, "loss": 5.176523685455322 }, { "id": "identity_119992", "image": "TextVQA/images/4d6cd28c29771d91.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the trash bin have?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 30.3125, "loss": 4.2525200843811035 }, { "id": "identity_278657", "image": "AI2D/images/4760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does label K represent?\nChoices:\n(A) sinuate\n(B) undulate\n(C) entire\n(D) Spiny" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.81640625, "loss": 4.0315351486206055 }, { "id": "identity_288046", "image": "PMC-VQA/images/PMC4538772_fig3_415204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scans are depicted in the image provided?\nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.65625, "loss": 4.1518378257751465 }, { "id": "identity_317262", "image": "TQA/images/abc_question_images/seasons_11349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red line at J called?\nChoices:\n(A) EQUATOR\n(B) VERNAL EQUINOX\n(C) WINTER SOLSTICE\n(D) MAGNETIC POLE" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.375, "loss": 4.433558940887451 }, { "id": "identity_118330", "image": "Super-CLEVR/images/superCLEVR_new_001630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the blue motorcycle?" }, { "from": "gpt", "value": "The answer is A green airplane" } ], "clip_score": 29.28125, "loss": 4.015102386474609 }, { "id": "identity_296048", "image": "PMC-VQA/images/PMC7867549_Fig6_362929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the Angio-MR with paramagnetic contrast medium regarding the left subclavian artery? \nChoices:\n(A) inflammation in the subclavian artery\n(B) dilation in the left subclavian artery\n(C) narrowing of the left subclavian artery\n(D) no abnormalities in the left subclavian artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.109375, "loss": 4.006308555603027 }, { "id": "identity_28429", "image": "DVQA/images/bar_train_00002558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have accuracy lower than 9 in at least one dataset?" }, { "from": "gpt", "value": "The answer is eight" } ], "clip_score": 31.296875, "loss": 4.1370625495910645 }, { "id": "identity_321899", "image": "A-OKVQA/images/175737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of landscape is seen in the background?\nChoices:\n(A) city\n(B) desert\n(C) farm\n(D) tropical jungle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.78125, "loss": 4.539328575134277 }, { "id": "identity_313273", "image": "FigureQA/images/6904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.036276340484619 }, { "id": "identity_277618", "image": "AI2D/images/2956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram is highlighted in yellow?\nChoices:\n(A) parietal lobe\n(B) frontal lobe\n(C) occipital lobe\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.828125, "loss": 4.152170181274414 }, { "id": "identity_296569", "image": "A-OKVQA/images/326936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is a human most likely to trip over? \n(A) stool\n(B) firewood\n(C) fan\n(D) bleach" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.71875, "loss": 4.12680721282959 }, { "id": "identity_208917", "image": "PMC-VQA/images/PMC6365311_Fig3_432950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the mass?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 16.8125, "loss": 4.464354038238525 }, { "id": "identity_15383", "image": "A-OKVQA/images/141426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What species of animal are the largest mammals here?\nChoices:\n(A) bovine\n(B) ovine\n(C) porcine\n(D) equine" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.234375, "loss": 4.196762561798096 }, { "id": "identity_335954", "image": "TextVQA/images/81478273f967ea73.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this blue bin for?" }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 22.890625, "loss": 4.108819007873535 }, { "id": "identity_328075", "image": "ScienceQA/images/19329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Cheyenne\n(B) Laramie\n(C) Boise\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.296875, "loss": 4.372412204742432 }, { "id": "identity_282508", "image": "TabMWP/images/9621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the data provided in the table, what was the difference in the time taken to complete homework between Wednesday and Thursday?" }, { "from": "gpt", "value": "The answer is -9" } ], "clip_score": 28.234375, "loss": 4.002849102020264 }, { "id": "identity_321740", "image": "A-OKVQA/images/172690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they want to prevent the ball from touching?\nChoices:\n(A) grass\n(B) sand\n(C) people\n(D) net" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.203125, "loss": 4.3006591796875 }, { "id": "identity_334703", "image": "A-OKVQA/images/412680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this side dish?\nChoices:\n(A) potato salad\n(B) beets\n(C) carrots\n(D) fries" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.8671875, "loss": 4.41190242767334 }, { "id": "identity_31826", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many axle?" }, { "from": "gpt", "value": "The answer is aviva stadium" } ], "clip_score": 11.6015625, "loss": 5.709231853485107 }, { "id": "identity_131322", "image": "TextVQA/images/a2a51e04db82a306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lamps are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.34375, "loss": 4.117421627044678 }, { "id": "identity_333699", "image": "A-OKVQA/images/419232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a likely occasion for all these people getting together?\nChoices:\n(A) funeral\n(B) school\n(C) church\n(D) birthday party" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.859375, "loss": 4.415678977966309 }, { "id": "identity_295817", "image": "A-OKVQA/images/433554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item of clothing is the man wearing in red? \n(A) scuba outfit \n(B) lifejacket \n(C) swimsuit \n(D) jeans" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.28125, "loss": 4.30382776260376 }, { "id": "identity_275722", "image": "A-OKVQA/images/340778.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which activity are these officers most likely participating?\nChoices:\n(A) bake sale\n(B) police escort\n(C) race\n(D) prostitution" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.597879409790039 }, { "id": "identity_298249", "image": "IconQA/images/43748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.71875, "loss": 4.9034037590026855 }, { "id": "identity_320204", "image": "A-OKVQA/images/572725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been rubbed off the top of this?\n\nChoices:\n(A) food\n(B) grass\n(C) paint\n(D) nothing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.0859375, "loss": 4.448010444641113 }, { "id": "identity_292360", "image": "PMC-VQA/images/PMC4645184_f10_443933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the right panels of the representative images? Is it:\n(A) Antibody staining\n(B) H&E staining\n(C) GFP fluorescence\n(D) Phase-contrast microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.367111682891846 }, { "id": "identity_291072", "image": "A-OKVQA/images/300772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How can you tell that the boys in the background are merely spectators based on the given options?\nChoices:\n(A) too small\n(B) no uniform\n(C) too big\n(D) laughing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.0625, "loss": 4.261072635650635 }, { "id": "identity_296081", "image": "PMC-VQA/images/PMC8065997_diagnostics-11-00636-f001_424327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging view was utilized to detect the absence of the pedicle winking owl sign in the right T1 vertebra? \nOptions:\n(A) anteroposterior view\n(B) posterior view\n(C) lateral view\n(D) thoracic lateral view" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.34375, "loss": 4.056466579437256 }, { "id": "identity_28425", "image": "PMC-VQA/images/PMC8011180_Fig1_407190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in the T2/FLAIR sequence refer to? \nChoices:\n(A) grey matter \n(B) white matter \n(C) cerebral fluid \n(D) bone " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.6875, "loss": 4.759137153625488 }, { "id": "identity_212041", "image": "PMC-VQA/images/PMC4928431_F4_46777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cross-sectional images of the brain are displayed?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.21875, "loss": 4.048454761505127 }, { "id": "identity_7728", "image": "TQA/images/question_images/biomes_6567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which biome is represented by the color yellow?\nChoices:\n(A) Chaparral\n(B) Tropical Rainforest\n(C) Desert\n(D) Tundra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.796875, "loss": 4.406091690063477 }, { "id": "identity_289536", "image": "DVQA/images/bar_train_00118794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For any dataset, which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is enemy" } ], "clip_score": 31.78125, "loss": 4.591717720031738 }, { "id": "identity_207929", "image": "A-OKVQA/images/304295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object on the ground to the left of the gates?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.21875, "loss": 4.297407627105713 }, { "id": "identity_275223", "image": "AI2D/images/4808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe a branched cluster of flowers where the branches are racemes?\nChoices:\n(A) panicle\n(B) flowers\n(C) raceme\n(D) sessile" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.0, "loss": 4.227414131164551 }, { "id": "identity_563", "image": "AI2D/images/2939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter D represent?\nChoices:\n(A) Endoplasmic Reticulum\n(B) Cytoplasm\n(C) DNA\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.53125, "loss": 5.131974220275879 }, { "id": "identity_262224", "image": "A-OKVQA/images/581394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the individuals engaged in close to the bus?\nChoices:\n(A) dancing\n(B) cleaning\n(C) sleeping\n(D) packing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.4453125, "loss": 4.038022994995117 }, { "id": "identity_232566", "image": "FigureQA/images/81894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in the area under the curve between Medium Orchid and this color?" }, { "from": "gpt", "value": "The answer is Peru, 300 units" } ], "clip_score": 25.734375, "loss": 5.205375671386719 }, { "id": "identity_4911", "image": "A-OKVQA/images/340454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the large red object in the background called?\nChoices:\n(A) crane\n(B) drill\n(C) ladder\n(D) skyscraper" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5, "loss": 4.694220542907715 }, { "id": "identity_270248", "image": "VQA2.0/images/48759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which individual crosses the street?" }, { "from": "gpt", "value": "The answer is oman" } ], "clip_score": 18.640625, "loss": 4.828487873077393 }, { "id": "identity_25571", "image": "PMC-VQA/images/PMC4331831_F8_359694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the patient?\nChoices:\n(A) Anterior segment of the left lower lobe \n(B) Posterior segment of the right upper lobe \n(C) Subpleural region of the left upper lobe \n(D) Central region of the right lower lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.26021671295166 }, { "id": "identity_5436", "image": "ScienceQA/images/20958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Knoxville\n(B) Jersey City\n(C) Trenton\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.625, "loss": 4.033783912658691 }, { "id": "identity_279401", "image": "IconQA/images/43362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How has this letter been altered? \nChoices:\n(A) slid\n(B) flipped\n(C) turned" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.515625, "loss": 4.369590759277344 }, { "id": "identity_28699", "image": "A-OKVQA/images/25996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What motion makes the child smile?\nChoices:\n(A) nodding\n(B) earthquake\n(C) rocking\n(D) sliding" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.2578125, "loss": 4.128521919250488 }, { "id": "identity_126482", "image": "Super-CLEVR/images/superCLEVR_new_001862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is located to the left of the green scooter?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.859375, "loss": 4.584106922149658 }, { "id": "identity_107071", "image": "A-OKVQA/images/247576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossings are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.28125, "loss": 4.254464149475098 }, { "id": "identity_68726", "image": "ScienceQA/images/4222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states share a border with the highlighted state?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 32.0, "loss": 4.270342826843262 }, { "id": "identity_330805", "image": "PMC-VQA/images/PMC7008751_FIG2_113810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of scan was done?\nChoices:\n(A) CT scan\n(B) PET scan\n(C) MRI\n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 4.064582824707031 }, { "id": "identity_315799", "image": "A-OKVQA/images/116882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does it seem like the stuffed animal is doing?\nChoices:\n(A) eating\n(B) drinking\n(C) singing\n(D) attacking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.1875, "loss": 4.665718078613281 }, { "id": "identity_281507", "image": "DocVQA/images/jzbn0226_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What aspects should be capable of being altered, either expanded or reduced?" }, { "from": "gpt", "value": "The answer is low priority designations" } ], "clip_score": 20.390625, "loss": 4.632385730743408 }, { "id": "identity_273723", "image": "FigureQA/images/85017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.1875, "loss": 4.512534141540527 }, { "id": "identity_295695", "image": "A-OKVQA/images/146841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fruit is depicted in the image? \n(A) banana\n(B) peach\n(C) watermelon\n(D) strawberry" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.703125, "loss": 4.350980281829834 }, { "id": "identity_268739", "image": "DocVQA/images/hffn0226_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which option corresponds to number 3?" }, { "from": "gpt", "value": "The answer is Daily Rental" } ], "clip_score": 11.5546875, "loss": 4.302191734313965 }, { "id": "identity_32496", "image": "ScienceQA/images/1048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these continents does the prime meridian intersect?\nChoices:\n(A) Africa\n(B) Australia\n(C) South America" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.703125, "loss": 4.264492988586426 }, { "id": "identity_288537", "image": "PlotQA/images/49330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name or heading of the Y-axis?" }, { "from": "gpt", "value": "The answer is Year" } ], "clip_score": 19.328125, "loss": 4.625200271606445 }, { "id": "identity_32633", "image": "A-OKVQA/images/99539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely drink in the cup on the table?\nChoices:\n(A) orange juice\n(B) coffee\n(C) alcohol\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.9375, "loss": 4.37505578994751 }, { "id": "identity_38273", "image": "PMC-VQA/images/PMC7171746_Fig1_155181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the sign seen in the brain MRI in a:? \nChoices:\n(A) SCP sign \n(B) PCA sign \n(C) MCP sign \n(D) MCA sign " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.21875, "loss": 4.172022819519043 }, { "id": "identity_265711", "image": "ScienceQA/images/3073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this plate?\nChoices:\n(A) wood\n(B) ceramic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.1807541847229 }, { "id": "identity_278555", "image": "A-OKVQA/images/196341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which time is shown in the photo: 815, 945, 215, or 145?\nChoices:\n(A) 815\n(B) 945\n(C) 215\n(D) 145" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.515625, "loss": 4.142658710479736 }, { "id": "identity_326207", "image": "PMC-VQA/images/PMC7796587_Fig7_339434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red fluorescence indicate in these images?\nChoices:\n(A) DNA expression\n(B) Nitrotyrosine expression\n(C) Mitochondrial expression\n(D) No expression" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 4.859101295471191 }, { "id": "identity_24923", "image": "A-OKVQA/images/210728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What period of the day is shown in the picture?\nChoices:\n(A) morning\n(B) evening\n(C) afternoon\n(D) night" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.28125, "loss": 4.519534111022949 }, { "id": "identity_29931", "image": "IconQA/images/88327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a paw. Which picture is ninth?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.03125, "loss": 4.7845354080200195 }, { "id": "identity_310990", "image": "AI2D/images/2752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the phase that follows the Waning Gibbous Moon?\nChoices:\n(A) Waxing Crescent Moon\n(B) Last Quarter Half Moon\n(C) New Moon\n(D) Waxing Gibbous Moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.625, "loss": 4.188959121704102 }, { "id": "identity_118471", "image": "PMC-VQA/images/PMC7910496_f0005_375522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image with respect to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial plane, feet at the bottom" } ], "clip_score": 26.03125, "loss": 4.345828533172607 }, { "id": "identity_108091", "image": "Super-CLEVR/images/superCLEVR_new_001641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest bicycle in the image?" }, { "from": "gpt", "value": "The answer is Blue frame, yellow handlebars" } ], "clip_score": 25.640625, "loss": 4.058166980743408 }, { "id": "identity_20628", "image": "A-OKVQA/images/205116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the old man doing with the scissors?\nChoices:\n(A) cutting hair\n(B) cutting string\n(C) cutting paper\n(D) cutting fruit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.625, "loss": 4.4392266273498535 }, { "id": "identity_327300", "image": "PMC-VQA/images/PMC5932520_fig1_309383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phase is depicted in the image?\nChoices:\n(A) venous phase\n(B) arterial phase\n(C) delayed phase\n(D) early parenchymal phase" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.22409725189209 }, { "id": "identity_200672", "image": "VQA2.0/images/409209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season does the image likely represent?" }, { "from": "gpt", "value": "The answer is Early autumn" } ], "clip_score": 21.40625, "loss": 4.2231950759887695 }, { "id": "identity_111813", "image": "VQA2.0/images/68385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many condiment packets are visible on the plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.96875, "loss": 4.678216457366943 }, { "id": "identity_266227", "image": "PMC-VQA/images/PMC4406019_Fig4_379826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does panel A display?\nOptions:\n(A) The sample from the model group\n(B) The sample from the sham group\n(C) The sample from the control group\n(D) No sample is depicted in this panel." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.203125, "loss": 4.02063512802124 }, { "id": "identity_18965", "image": "TQA/images/question_images/human_system_digestive_6082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is between the Anus and the Large Intestine?\nChoices:\n(A) Small Intestine\n(B) Rectum\n(C) Stomach\n(D) Lleum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 4.190898895263672 }, { "id": "identity_205546", "image": "VQA2.0/images/449776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tiers does the birthday cake have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.4375, "loss": 4.241811752319336 }, { "id": "identity_38298", "image": "PMC-VQA/images/PMC6776557_Fig6_45626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the PGCs on the picture? \nChoices:\n(A) blue \n(B) white \n(C) green \n(D) red " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.828125, "loss": 4.060158729553223 }, { "id": "identity_318354", "image": "PMC-VQA/images/PMC3914336_fig2_264315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Anterior vaginal wall\n(B) Posterior vaginal wall\n(C) Cervix\n(D) Uterus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 4.327810764312744 }, { "id": "identity_295301", "image": "PMC-VQA/images/PMC6723115_F1_33045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the white dotted line in the schematic?\nChoices:\n(A) The basal surface \n(B) The lateral surface \n(C) The apical surface \n(D) The luminal surface." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.890625, "loss": 4.14237117767334 }, { "id": "identity_272502", "image": "VizWiz/images/VizWiz_train_00012686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flower is this purple flower?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 27.9375, "loss": 4.041205883026123 }, { "id": "identity_133386", "image": "VizWiz/images/VizWiz_train_00016576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material appears to be at the bottom of the object?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 24.609375, "loss": 4.012243747711182 }, { "id": "identity_96486", "image": "IconQA/images/50752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which shape is directly in the middle of the sequence?" }, { "from": "gpt", "value": "The answer is A star" } ], "clip_score": 24.625, "loss": 4.476089000701904 }, { "id": "identity_125415", "image": "A-OKVQA/images/286376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete skateboards are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.15625, "loss": 4.371509552001953 }, { "id": "identity_328802", "image": "PMC-VQA/images/PMC3932416_F6_268892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is depicted?\nChoices:\n(A) SEM image\n(B) CT scan image\n(C) Brightfield image\n(D) X-ray image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.984375, "loss": 5.320805549621582 }, { "id": "identity_62261", "image": "DocVQA/images/rlvm0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the ESD Project Leader?" }, { "from": "gpt", "value": "The answer is Martin Redding" } ], "clip_score": 18.828125, "loss": 4.379591941833496 }, { "id": "identity_220909", "image": "VQA-AS/images/6242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many building blocks are scattered on the floor?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 21.34375, "loss": 4.428027153015137 }, { "id": "identity_13894", "image": "AI2D/images/4379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is shown in the diagram above?\nChoices:\n(A) Humans Feet\n(B) Birds Feet\n(C) Birds Wings\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.859375, "loss": 4.040101051330566 }, { "id": "identity_218048", "image": "TextVQA/images/6f267dc37efc0991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the can in the middle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.421875, "loss": 4.040859699249268 }, { "id": "identity_331607", "image": "A-OKVQA/images/341838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who mostly lives in this area?\nChoices:\n(A) korean\n(B) chinese\n(C) japanese\n(D) indian" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.234375, "loss": 4.1892595291137695 }, { "id": "identity_337093", "image": "IconQA/images/67383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.787995338439941 }, { "id": "identity_207230", "image": "TextVQA/images/747cbfa915e19f31.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is placed next to the monitor on the left?" }, { "from": "gpt", "value": "The answer is Webcam" } ], "clip_score": 23.03125, "loss": 4.234317302703857 }, { "id": "identity_113116", "image": "VQA-AS/images/13783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there on the top shelf?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.515625, "loss": 4.469046115875244 }, { "id": "identity_277829", "image": "DVQA/images/bar_train_00082575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the crimson color?" }, { "from": "gpt", "value": "The answer is spot" } ], "clip_score": 26.890625, "loss": 4.379756450653076 }, { "id": "identity_153863", "image": "AI2D/images/158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the Sun relative to the Earth during a spring tide?" }, { "from": "gpt", "value": "The answer is Directly behind or in front" } ], "clip_score": 35.90625, "loss": 4.042519569396973 }, { "id": "identity_296039", "image": "PMC-VQA/images/PMC6158818_Fig2_372636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this case, where is the increased tracer uptake observed in the patient?\nChoices:\n(A) Left femoral shaft\n(B) Right femoral shaft\n(C) Femoral head\n(D) Pelvis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.480260372161865 }, { "id": "identity_4866", "image": "Super-CLEVR/images/superCLEVR_new_000572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the mountain bike and the cruiser have the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.609375, "loss": 5.5020551681518555 }, { "id": "identity_207685", "image": "PMC-VQA/images/PMC8227570_children-08-00450-f001_476791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general condition of the brain tissue based on this image?" }, { "from": "gpt", "value": "The answer is Normal condition" } ], "clip_score": 33.21875, "loss": 4.306830883026123 }, { "id": "identity_287880", "image": "PMC-VQA/images/PMC4998534_F1_66567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the MRI images being displayed?\nChoices:\n(A) Axial\n(B) Coronal\n(C) Sagittal\n(D) Oblique" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 4.122548580169678 }, { "id": "identity_9179", "image": "IconQA/images/52332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.890625, "loss": 4.007916450500488 }, { "id": "identity_329243", "image": "PMC-VQA/images/PMC7448131_fig2-2050313X20952996_240208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Right axilla \n(B) Left axilla \n(C) Abdomen \n(D) Chest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.8125, "loss": 4.147067546844482 }, { "id": "identity_333950", "image": "VQA2.0/images/63875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the giraffe taking the food?" }, { "from": "gpt", "value": "The answer is his mouth" } ], "clip_score": 29.21875, "loss": 4.006374835968018 }, { "id": "identity_18107", "image": "AI2D/images/1184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the core of the Nucleus called?\nChoices:\n(A) Centrioles\n(B) Nucleolus\n(C) cytoplasm\n(D) ribosomes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.765625, "loss": 4.1498541831970215 }, { "id": "identity_282235", "image": "DVQA/images/bar_train_00018032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the leftmost bar in each group?" }, { "from": "gpt", "value": "The answer is core" } ], "clip_score": 18.609375, "loss": 4.162775993347168 }, { "id": "identity_335408", "image": "A-OKVQA/images/123535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of sheep is closest to the back of the blue trailer?\nChoices:\n(A) ewe\n(B) lamb\n(C) ram\n(D) wether" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.109375, "loss": 4.263054847717285 }, { "id": "identity_270870", "image": "FigureQA/images/6412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.734375, "loss": 4.007513999938965 }, { "id": "identity_338472", "image": "A-OKVQA/images/76578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction are the elephants moving?\nChoices:\n(A) west\n(B) east\n(C) north\n(D) south" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5625, "loss": 4.350244045257568 }, { "id": "identity_23515", "image": "ScienceQA/images/14562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) stretchy\n(B) rough\n(C) sour" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.9609375, "loss": 4.324830055236816 }, { "id": "identity_22307", "image": "VQA2.0/images/221674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in the background?" }, { "from": "gpt", "value": "The answer is mountains" } ], "clip_score": 14.0703125, "loss": 4.143092632293701 }, { "id": "identity_298188", "image": "A-OKVQA/images/247146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skill level comparison between the opposing teams in this game?\nChoices:\n(A) both novices\n(B) even\n(C) lopsided\n(D) senior masters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.71875, "loss": 4.42653751373291 }, { "id": "identity_313557", "image": "A-OKVQA/images/480311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the relationship of the two adult individuals to the boys?\nChoices:\n(A) students\n(B) prisoners\n(C) teachers\n(D) strangers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.046875, "loss": 4.636477470397949 }, { "id": "identity_325839", "image": "PMC-VQA/images/PMC4638099_Fig1_442499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the pattern within the white matter resemble?\nChoices:\n(A) Zebra print \n(B) Leopard spots \n(C) Tiger skin \n(D) Snake scales" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.546875, "loss": 4.630928993225098 }, { "id": "identity_63340", "image": "AI2D/images/3846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of leaves are identified in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.96875, "loss": 4.705805778503418 }, { "id": "identity_154772", "image": "TQA/images/question_images/convection_of_air_6660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete convection cells are depicted in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.375, "loss": 4.489475727081299 }, { "id": "identity_118846", "image": "Super-CLEVR/images/superCLEVR_new_001765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 16.9375, "loss": 4.592431545257568 }, { "id": "identity_297032", "image": "PMC-VQA/images/PMC6764639_FIG1_42430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of red arrows in Figure 1A and 1B?\nChoices:\n(A) Abnormal signals\n(B) Normal brainstem\n(C) Ischemic stroke\n(D) Post-contrast FLAIR sequence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.723607063293457 }, { "id": "identity_33200", "image": "Super-CLEVR/images/superCLEVR_new_000019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the big tandem bike to the left of the small red car have the same material as the red car that is on the left side of the tiny green matte object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 30.140625, "loss": 5.144289016723633 }, { "id": "identity_14530", "image": "FigureQA/images/4357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.188769340515137 }, { "id": "identity_35462", "image": "A-OKVQA/images/66459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the man here look at?\nChoices:\n(A) friend\n(B) oncoming traffic\n(C) police\n(D) lights" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.671875, "loss": 4.661511421203613 }, { "id": "identity_60936", "image": "DocVQA/images/ntll0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode mentioned for the activity on August-12 at 15:55?" }, { "from": "gpt", "value": "The answer is G3/T" } ], "clip_score": 19.328125, "loss": 4.56726598739624 }, { "id": "identity_265233", "image": "FigureQA/images/63243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the least smooth?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.40625, "loss": 4.012131690979004 }, { "id": "identity_2641", "image": "ScienceQA/images/7601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) Nashville\n(B) Austin\n(C) Memphis\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.421875, "loss": 4.283312797546387 }, { "id": "identity_289502", "image": "PMC-VQA/images/PMC5841142_awx262-F4_284852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the Kapp maps depict?\nChoices:\n(A) Tissue type \n(B) Progression of lesion \n(C) Contrast enhancement \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.734375, "loss": 4.4191508293151855 }, { "id": "identity_20270", "image": "AI2D/images/96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does letter A represent?\nChoices:\n(A) South\n(B) North\n(C) Day\n(D) Night" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.3125, "loss": 4.026101589202881 }, { "id": "identity_287563", "image": "A-OKVQA/images/180341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a low-energy way for someone nearby to increase their elevation?\nChoices:\n(A) ski lift\n(B) jog\n(C) catch taxi\n(D) ski uphill" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.0862956047058105 }, { "id": "identity_338393", "image": "PMC-VQA/images/PMC6854723_Fig1_67867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the left graphs?\nChoices:\n(A) P2Y12R in TNC\n(B) Higher magnification images\n(C) White dotted line frames\n(D) Scale bar" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.03125, "loss": 4.740972995758057 }, { "id": "identity_4694", "image": "AI2D/images/3638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What supplies blood to the spleen?\nChoices:\n(A) descending colon\n(B) hepatic blood vessels\n(C) splenic artery\n(D) renal blood vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.125, "loss": 4.071938514709473 }, { "id": "identity_305349", "image": "FigureQA/images/20778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.421875, "loss": 4.4485554695129395 }, { "id": "identity_7548", "image": "AI2D/images/1175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The orange circular shape is called what?\nChoices:\n(A) Ribosome\n(B) Lysosome\n(C) Nucleolus\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.859375, "loss": 4.547181606292725 }, { "id": "identity_107395", "image": "PMC-VQA/images/PMC8138997_Fig4_446783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology or abnormality in the tissue section shown?" }, { "from": "gpt", "value": "The answer is No obvious pathology visible" } ], "clip_score": 33.40625, "loss": 4.125804424285889 }, { "id": "identity_301020", "image": "FigureQA/images/66738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Peru the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.40207576751709 }, { "id": "identity_35623", "image": "AI2D/images/3705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is represented in the above diagram?\nChoices:\n(A) kidney\n(B) brain\n(C) urinary bladder\n(D) heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.38438606262207 }, { "id": "identity_12779", "image": "VQA-RAD/images/synpic29048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The image has an abnormality located where?" }, { "from": "gpt", "value": "The answer is retrocardiac" } ], "clip_score": 27.046875, "loss": 4.185244560241699 }, { "id": "identity_190487", "image": "PMC-VQA/images/PMC3675018_ppat-1003413-g004_209543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the main object in the image?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 17.59375, "loss": 4.13343620300293 }, { "id": "identity_1173", "image": "ScienceQA/images/346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) breakable\n(B) hard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.07684326171875 }, { "id": "identity_321618", "image": "PMC-VQA/images/PMC4661912_ijms-16-26054-f004_448410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view displays the tracer activity from the side of the brain?\nChoices:\n(A) Sagittal\n(B) Coronal\n(C) Transverse\n(D) Frontal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.671875, "loss": 4.4498443603515625 }, { "id": "identity_288688", "image": "PMC-VQA/images/PMC3299701_pone-0032796-g004_129436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the horizontal sections out of the following options: post contrast T1W MDEFT, 3D-GEFC, MRI, or X-ray?\nChoices:\n(A) post contrast T1W MDEFT \n(B) 3D-GEFC \n(C) MRI \n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.90625, "loss": 4.456193923950195 }, { "id": "identity_11192", "image": "ScienceQA/images/7682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Massachusetts?\nChoices:\n(A) Boston\n(B) Augusta\n(C) Cambridge\n(D) Plymouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.453125, "loss": 4.355996131896973 }, { "id": "identity_316206", "image": "PMC-VQA/images/PMC4119891_fig1_311018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the small black arrows in this SEM image of the PLC tube-guide?\nChoices:\n(A) Inner Diameter\n(B) Cell\n(C) Outer Diameter\n(D) Magnification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.296875, "loss": 4.468276500701904 }, { "id": "identity_321877", "image": "PMC-VQA/images/PMC4672531_Fig1_451450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the aneurysm shown here?\nChoices:\n(A) Vassal aneurysm\n(B) Ventricular aneurysm\n(C) Sinus of Valsalva aneurysm\n(D) Aortic aneurysm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.984375, "loss": 4.274224281311035 }, { "id": "identity_337505", "image": "PMC-VQA/images/PMC4227161_ijms-15-17270-f004_334883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of a typical lumbar disc?\nChoices:\n(A) Not mentioned in the caption, \n(B) Round, \n(C) Square, \n(D) Triangular." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.53125, "loss": 4.568995475769043 }, { "id": "identity_29611", "image": "AI2D/images/3648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram above is of what?\nChoices:\n(A) Insect\n(B) Heart\n(C) Ear\n(D) Animal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.78125, "loss": 4.074896812438965 }, { "id": "identity_313558", "image": "A-OKVQA/images/491857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in the boats going to do at their destination?\nChoices:\n(A) sight see\n(B) shop\n(C) party\n(D) swim" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.34375, "loss": 4.595574378967285 }, { "id": "identity_4281", "image": "AI2D/images/1174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the small dots on the nucleus (K) represent?\nChoices:\n(A) mitochondrion\n(B) nuclear pore\n(C) lysosome\n(D) ribosome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.546875, "loss": 4.548240661621094 }, { "id": "identity_1319", "image": "Super-CLEVR/images/superCLEVR_new_001192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The large red thing that is the same material as the tiny cruiser is what shape?" }, { "from": "gpt", "value": "The answer is school bus" } ], "clip_score": 23.21875, "loss": 4.229517459869385 }, { "id": "identity_35898", "image": "DocVQA/images/ztwc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who made the appt?" }, { "from": "gpt", "value": "The answer is sue segrest" } ], "clip_score": 21.671875, "loss": 5.762088775634766 }, { "id": "identity_305409", "image": "TQA/images/abc_question_images/photosynthesis_14130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates energy from sunlight entering the plant?\nChoices:\n(A) U\n(B) J\n(C) A\n(D) T" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.4375, "loss": 4.174659729003906 }, { "id": "identity_320587", "image": "A-OKVQA/images/13372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would this place be called?\nChoices:\n(A) garage\n(B) dock\n(C) bunker\n(D) hangar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.2421875, "loss": 4.560116767883301 }, { "id": "identity_309815", "image": "Super-CLEVR/images/superCLEVR_new_000029.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small yellow matte object to the right of the yellow bicycle the same shape as the large metal object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 32.71875, "loss": 5.240163803100586 }, { "id": "identity_287524", "image": "IconQA/images/57417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do all the dogs have a sufficient number of tennis balls?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.34375, "loss": 4.056334018707275 }, { "id": "identity_37542", "image": "TabMWP/images/23475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Aiden's campground is close to two hiking trails with scenic destinations. One trail ends at an underground lake in the bottom of a small cave. The other trail ends at a mountain peak. The elevation of each destination is shown on the campground map. Which trail's destination is farther from sea level?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.09375, "loss": 4.1695709228515625 }, { "id": "identity_22323", "image": "PMC-VQA/images/PMC7332021_pone.0234792.g005_204142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the nuclei stain? \nChoices:\n(A) Red \n(B) Blue \n(C) Yellow \n(D) Green " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.8125, "loss": 4.444986820220947 }, { "id": "identity_325949", "image": "PMC-VQA/images/PMC3861471_f1-etm-07-01-0199_251398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the state of the medial collateral ligament on the preoperative MRI scan?\nChoices:\n(A) completely disrupted\n(B) partially torn\n(C) intact\n(D) inflamed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.40625, "loss": 4.266436576843262 }, { "id": "identity_106720", "image": "Super-CLEVR/images/superCLEVR_new_001149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned directly in front of the small purple rubber object?" }, { "from": "gpt", "value": "The answer is A van" } ], "clip_score": 19.171875, "loss": 4.530689716339111 }, { "id": "identity_338061", "image": "A-OKVQA/images/483424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of water body contains many rowboats?\nChoices:\n(A) river\n(B) lake\n(C) sea\n(D) ocean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.671875, "loss": 4.114133834838867 }, { "id": "identity_36825", "image": "ScienceQA/images/13641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Tennessee?\nChoices:\n(A) Nashville\n(B) Tallahassee\n(C) Cleveland\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.421875, "loss": 4.213020324707031 }, { "id": "identity_51000", "image": "DocVQA/images/qhxj0037_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main difficulty in the problem of detecting single most difficult protein in hetero-atoms?" }, { "from": "gpt", "value": "The answer is Moisture" } ], "clip_score": 17.734375, "loss": 5.01077938079834 }, { "id": "identity_7540", "image": "ScienceQA/images/5798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's skin is also adapted for survival in cold places?\nChoices:\n(A) armadillo lizard\n(B) snowy owl" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.110693454742432 }, { "id": "identity_20115", "image": "PMC-VQA/images/PMC4969259_f0005_57789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scores are shown in the image? \nChoices:\n(A) Depression scores \n(B) Anxiety scores \n(C) Stress scores \n(D) Happiness scores " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.703125, "loss": 4.181049346923828 }, { "id": "identity_211488", "image": "PMC-VQA/images/PMC4342219_pone.0118212.g003_362810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image indicated by the triangular markers?" }, { "from": "gpt", "value": "The answer is Anterior or front side orientation" } ], "clip_score": 24.1875, "loss": 4.321909427642822 }, { "id": "identity_34122", "image": "ScienceQA/images/2873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) Antarctica\n(B) Asia\n(C) Africa\n(D) North America" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.278412818908691 }, { "id": "identity_321097", "image": "A-OKVQA/images/69189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person on the bike do for a living?\nChoices:\n(A) gives rides\n(B) sells bikes\n(C) sells chickens\n(D) steals bikes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.046875, "loss": 4.225498676300049 }, { "id": "identity_240598", "image": "PMC-VQA/images/PMC6331538_F9_422269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific cellular structures are highlighted by the LM6/DAPI staining in Figure 2G, and how does the higher magnification inset contribute to the understanding of these structures?" }, { "from": "gpt", "value": "The answer is Detailed cell wall structure and spatial relationship with nuclei" } ], "clip_score": 30.390625, "loss": 4.225516319274902 }, { "id": "identity_115282", "image": "VQA2.0/images/232689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pendant lights are hanging above the kitchen island?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.609375, "loss": 4.210385322570801 }, { "id": "identity_7474", "image": "ScienceQA/images/2054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) salty\n(B) fuzzy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.1875, "loss": 4.4143242835998535 }, { "id": "identity_308641", "image": "PMC-VQA/images/PMC5658582_mbt212678-fig-0002_231982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the structure labeled here?\nChoices:\n(A) Silica bodies \n(B) Stomata \n(C) Long cells \n(D) Short cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.71875, "loss": 4.8348708152771 }, { "id": "identity_310003", "image": "A-OKVQA/images/473190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can these moving things be considered a part of?\nChoices:\n(A) school\n(B) fleet\n(C) clowder\n(D) database" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.7890625, "loss": 4.328249454498291 }, { "id": "identity_13452", "image": "FigureQA/images/80376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.421875, "loss": 4.135418891906738 }, { "id": "identity_292140", "image": "TextVQA/images/7cbbf03925eeb191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of supply house is this?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 22.140625, "loss": 4.366189956665039 }, { "id": "identity_77953", "image": "AI2D/images/2475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages of the bee's life cycle are depicted in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 28.546875, "loss": 4.5709123611450195 }, { "id": "identity_278121", "image": "FigureQA/images/56344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Peru or Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.109375, "loss": 4.694717884063721 }, { "id": "identity_1713", "image": "A-OKVQA/images/578705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is aimed at the man on the motorcycle?\nChoices:\n(A) camera\n(B) antique blunderbuss\n(C) paintball\n(D) ruler" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 4.202892303466797 }, { "id": "identity_307464", "image": "ScienceQA/images/11668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this water pitcher made of?\nChoices:\n(A) plastic\n(B) leather" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.421875, "loss": 4.163107872009277 }, { "id": "identity_289722", "image": "A-OKVQA/images/95744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What American ice cream treat would be the most suitable option for these vendors to promote?\nChoices:\n(A) banana split\n(B) chocolate malt\n(C) chocolate sundae\n(D) brownie cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.78125, "loss": 4.3818254470825195 }, { "id": "identity_22028", "image": "A-OKVQA/images/7952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen if the white truck parks a few feet directly ahead?\nChoices:\n(A) parade\n(B) collision\n(C) movie\n(D) sunset" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.4375, "loss": 4.544146537780762 }, { "id": "identity_315203", "image": "DVQA/images/bar_train_00191095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the crimson color represent?" }, { "from": "gpt", "value": "The answer is start" } ], "clip_score": 19.71875, "loss": 4.08534574508667 }, { "id": "identity_10269", "image": "PMC-VQA/images/PMC4159571_Fig1_319149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 2 Question: In which column are the areas indicated? \nChoices:\n(A) Left panel \n(B) Right panel \n(C) Middle panel \n(D) Full panel " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.75, "loss": 4.179572105407715 }, { "id": "identity_150128", "image": "DocVQA/images/zrlp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What location was visited on April 16 and 17?" }, { "from": "gpt", "value": "The answer is Philadelphia" } ], "clip_score": 20.34375, "loss": 4.0367913246154785 }, { "id": "identity_30371", "image": "A-OKVQA/images/428740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the metal tube at the top right corner of the image?\nChoices:\n(A) black\n(B) red\n(C) grey\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.671875, "loss": 4.170819282531738 }, { "id": "identity_332999", "image": "PMC-VQA/images/PMC8122927_jcm-10-01834-f002_441031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image with high magnification display?\nChoices:\n(A) Dark intracellular material\n(B) Fibrils and tubules\n(C) Bright intracellular material\n(D) Only fibrils and no tubules" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.34375, "loss": 4.714657783508301 }, { "id": "identity_262221", "image": "ScienceQA/images/12372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the following options: shiny, sticky, or fuzzy?\nChoices:\n(A) shiny\n(B) sticky\n(C) fuzzy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9765625, "loss": 4.014894962310791 }, { "id": "identity_158713", "image": "AI2D/images/748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete lunar rotations are depicted in this diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 36.9375, "loss": 4.167078018188477 }, { "id": "identity_190786", "image": "A-OKVQA/images/306716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus?" }, { "from": "gpt", "value": "The answer is Dark blue" } ], "clip_score": 23.078125, "loss": 4.025392532348633 }, { "id": "identity_68092", "image": "TQA/images/question_images/tides_2529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tidal bulges are shown on Earth in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 35.0, "loss": 4.281098365783691 }, { "id": "identity_296849", "image": "VQA2.0/images/186233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pillars on the building?" }, { "from": "gpt", "value": "The answer is cannot tell" } ], "clip_score": 16.734375, "loss": 4.720118999481201 }, { "id": "identity_67545", "image": "TQA/images/question_images/biomes_6569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many biomes are depicted in the Australian region?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.3125, "loss": 4.340487480163574 }, { "id": "identity_1486", "image": "PMC-VQA/images/PMC6825074_Fig6_59899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red arrow pointing towards in the image? \nChoices:\n(A) Synovial thickening \n(B) Muscle tear \n(C) Nerve entrapment \n(D) Tendon rupture " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 4.94040584564209 }, { "id": "identity_301595", "image": "AI2D/images/3510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label for T in the diagram above?\nChoices:\n(A) Nucleus\n(B) Cytoplasm\n(C) Mitochondrion\n(D) Chloroplast" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.140625, "loss": 4.3333635330200195 }, { "id": "identity_281217", "image": "A-OKVQA/images/253250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which part of the boat is the person in the red shirt standing? \n(A) bow \n(B) stern \n(C) transom \n(D) hull" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.453125, "loss": 4.72225284576416 }, { "id": "identity_188869", "image": "VizWiz/images/VizWiz_train_00002266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of decorative plant is carved on the clock?" }, { "from": "gpt", "value": "The answer is Oak" } ], "clip_score": 30.984375, "loss": 4.596498966217041 }, { "id": "identity_5964", "image": "AI2D/images/3642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which layer is between left atrium and left coronary artery?\nChoices:\n(A) aorta\n(B) pulmonary artery\n(C) left pulmonary veins\n(D) descending aorta" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.828125, "loss": 4.0065178871154785 }, { "id": "identity_336338", "image": "ScienceQA/images/11208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Boston\n(B) Houston\n(C) Washington, D.C.\n(D) St. Louis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.890625, "loss": 5.077584266662598 }, { "id": "identity_327292", "image": "PMC-VQA/images/PMC7500744_FIG1_256789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image show?\nChoices:\n(A) Pulmonary embolism \n(B) Lung nodule \n(C) Pulmonary artery AVMs \n(D) Bronchitis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.625, "loss": 4.051348686218262 }, { "id": "identity_281659", "image": "ScienceQA/images/15674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Ohio?\n(A) Columbus\n(B) Indianapolis\n(C) Cincinnati\n(D) Cleveland" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.19899320602417 }, { "id": "identity_130091", "image": "VQA2.0/images/388179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle on the left side of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.90625, "loss": 4.040112018585205 }, { "id": "identity_38076", "image": "PMC-VQA/images/PMC5911611_cam41363-fig-0001_305036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technology is used to obtain the image shown in the figure? \nChoices:\n(A) BCT \n(B) EM \n(C) FET \n(D) PET " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.46875, "loss": 4.260087490081787 }, { "id": "identity_267372", "image": "A-OKVQA/images/302343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the stripe that is wrapped around the red boat? Is it:\n(A) purple\n(B) white\n(C) red\n(D) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 4.241542339324951 }, { "id": "identity_109214", "image": "VizWiz/images/VizWiz_train_00012718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object to the right of the purple vase?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 19.984375, "loss": 4.636647701263428 }, { "id": "identity_29289", "image": "IconQA/images/94845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of flowers even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.03125, "loss": 4.205632209777832 }, { "id": "identity_16653", "image": "DocVQA/images/jlxm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: WHAT SHOULD BE DONE BEFORE AND AFTER CONNECTING WITH SILANIZED PRESS FIT UNION?" }, { "from": "gpt", "value": "The answer is performance test" } ], "clip_score": 21.609375, "loss": 5.193014144897461 }, { "id": "identity_198371", "image": "A-OKVQA/images/143569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.78125, "loss": 4.446720123291016 }, { "id": "identity_320698", "image": "A-OKVQA/images/447179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of job does the person in black hold?\nChoices:\n(A) dairy\n(B) religious\n(C) factory\n(D) sports" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.7578125, "loss": 4.830268383026123 }, { "id": "identity_113974", "image": "PMC-VQA/images/PMC7577462_pone.0240845.g001_279272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of other lesions or abnormalities in the image?" }, { "from": "gpt", "value": "The answer is No other obvious lesions" } ], "clip_score": 25.578125, "loss": 4.5785393714904785 }, { "id": "identity_186929", "image": "A-OKVQA/images/541764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.4375, "loss": 4.133020877838135 }, { "id": "identity_312338", "image": "FigureQA/images/24783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the lower value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.875, "loss": 4.003578186035156 }, { "id": "identity_320861", "image": "PMC-VQA/images/PMC8121193_FIG2_440377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the left thoracic mass originate in the mediastinum?\n\nChoices:\n(A) Anterior Mediastinum\n(B) Posterior Mediastinum\n(C) Middle Mediastinum\n(D) Superior Mediastinum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.09375, "loss": 4.168981552124023 }, { "id": "identity_292805", "image": "TextVQA/images/bd104367a292f20a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name for a smaller version of something?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.109375, "loss": 4.339380264282227 }, { "id": "identity_611", "image": "ScienceQA/images/6286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Jamaica\n(B) Haiti\n(C) the Dominican Republic\n(D) Dominica" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.984375, "loss": 4.151088714599609 }, { "id": "identity_306167", "image": "TQA/images/question_images/dna_6760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for a connected sugar, phosphate group, and protein?\nChoices:\n(A) hydrogen bond\n(B) deoxyribose\n(C) nucleotide\n(D) sugar-phosphate backbone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.21875, "loss": 4.042535781860352 }, { "id": "identity_315854", "image": "TextVQA/images/2b9618e6ba8d7421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number on the black car on the side?" }, { "from": "gpt", "value": "The answer is la56 vvm" } ], "clip_score": 18.046875, "loss": 4.109523296356201 }, { "id": "identity_290483", "image": "A-OKVQA/images/259717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for cooking on a boat according to the given choices?\nChoices:\n(A) no room\n(B) tastes better\n(C) it's home\n(D) feed fish" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.453125, "loss": 4.320129871368408 }, { "id": "identity_287318", "image": "A-OKVQA/images/107389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the person engaged in?\nChoices:\n(A) returning\n(B) star gazing\n(C) sun spotting\n(D) serving" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.6171875, "loss": 4.5050950050354 }, { "id": "identity_10199", "image": "PMC-VQA/images/PMC4312452_Fig4_354171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the right image? \nChoices:\n(A) Exterior of ventral valve ; \n(B) Interior of dorsal valve ; \n(C) Interior of ventral valve ; \n(D) Exterior of dorsal valve. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.796875, "loss": 4.091860771179199 }, { "id": "identity_201242", "image": "VQA2.0/images/474550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flavors of doughnuts are listed on the menu board?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.34375, "loss": 4.093553066253662 }, { "id": "identity_26300", "image": "Super-CLEVR/images/superCLEVR_new_001910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The small aeroplane has what color?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 18.046875, "loss": 4.67936897277832 }, { "id": "identity_2859", "image": "FigureQA/images/16477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.171875, "loss": 4.015590190887451 }, { "id": "identity_303218", "image": "Super-CLEVR/images/superCLEVR_new_000244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a shiny motorbike of the same color as this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.046875, "loss": 4.771024227142334 }, { "id": "identity_155167", "image": "AI2D/images/4842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature is common to both the composite volcano and the caldera volcano?" }, { "from": "gpt", "value": "The answer is Ash" } ], "clip_score": 33.125, "loss": 4.31350564956665 }, { "id": "identity_67708", "image": "TQA/images/abc_question_images/parts_ocean_floor_17240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label E represent in this oceanic diagram?" }, { "from": "gpt", "value": "The answer is Ocean depth" } ], "clip_score": 29.3125, "loss": 4.012111186981201 }, { "id": "identity_164395", "image": "GeoQA+/images/1692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the 4x4 square grid shown in the image, with each small square having a side length of 1, if point A is a vertex of the square and point D is the foot of the perpendicular from A to BC, what is the slope of line segment AC?" }, { "from": "gpt", "value": "The answer is Undefined" } ], "clip_score": 35.46875, "loss": 4.158606052398682 }, { "id": "identity_216104", "image": "PMC-VQA/images/PMC4606887_Fig1_433713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain in image (a)?" }, { "from": "gpt", "value": "The answer is Coronal orientation" } ], "clip_score": 32.78125, "loss": 4.217609405517578 }, { "id": "identity_207734", "image": "A-OKVQA/images/580414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.9375, "loss": 4.2759528160095215 }, { "id": "identity_274449", "image": "AI2D/images/3698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct label for the organ responsible for expelling waste from the body?\nChoices:\n(A) Q\n(B) O\n(C) R\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.609375, "loss": 4.730306625366211 }, { "id": "identity_213091", "image": "VQA2.0/images/139658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen in the background of the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 16.828125, "loss": 4.604282855987549 }, { "id": "identity_32250", "image": "A-OKVQA/images/242592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the colorful machines called?\nChoices:\n(A) garbage bins\n(B) mailboxes\n(C) vending machines\n(D) storage lockers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.59375, "loss": 4.516768455505371 }, { "id": "identity_291489", "image": "ScienceQA/images/16558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these two objects share among the following choices: (A) sticky or (B) bendable?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.057295322418213 }, { "id": "identity_331400", "image": "PMC-VQA/images/PMC4846987_f5_24859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the images in the top row of this figure depict?\nChoices:\n(A) Segmentations of a brain tumor \n(B) Segmentations at three different time points \n(C) FLAIR images at three different time points \n(D) Overlays of the segmentations of a single time point" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.296875, "loss": 4.335659027099609 }, { "id": "identity_38648", "image": "A-OKVQA/images/194756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will be parked in front of the people?\nChoices:\n(A) boats\n(B) nothing\n(C) yachts\n(D) ships" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.182060241699219 }, { "id": "identity_325700", "image": "A-OKVQA/images/31019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the traffic cones placed where they are?\nChoices:\n(A) directions\n(B) road closure\n(C) art\n(D) construction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.265625, "loss": 4.135357856750488 }, { "id": "identity_2175", "image": "PMC-VQA/images/PMC8929746_f2_225133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what part of the gastric tissue is the linear pattern seen? \nChoices:\n(A) muscle layer \n(B) serosa \n(C) stroma \n(D) submucosa " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.9375, "loss": 4.812483310699463 }, { "id": "identity_13745", "image": "FigureQA/images/86125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Orange Red have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.375, "loss": 4.502983093261719 }, { "id": "identity_276938", "image": "TQA/images/question_images/skin_cross_section_7584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Up to which layer of the skin does the hair shaft extend?\nChoices:\n(A) Dermis\n(B) Epidermis\n(C) Sebaceous Gland\n(D) Fatty Tissue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.125, "loss": 4.583926200866699 }, { "id": "identity_25070", "image": "PMC-VQA/images/PMC4725395_f3_464536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the calcifications? \nChoices:\n(A) at the supraspinatus tendon insertion \n(B) at the biceps tendon insertion \n(C) at the infraspinatus tendon insertion \n(D) at the triceps tendon insertion " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.28125, "loss": 4.388581275939941 }, { "id": "identity_18703", "image": "AI2D/images/3279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in the diagram?\nChoices:\n(A) mitochondrion\n(B) cell wall\n(C) cell membrane\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.296875, "loss": 4.3480682373046875 }, { "id": "identity_269904", "image": "IconQA/images/14651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do all the snowmen have enough carrot noses?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.53125, "loss": 4.176564693450928 }, { "id": "identity_309387", "image": "VQA-AS/images/8658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which person is the child?" }, { "from": "gpt", "value": "The answer is girl" } ], "clip_score": 20.6875, "loss": 4.0858893394470215 }, { "id": "identity_313485", "image": "ScienceQA/images/6515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Trenton\n(B) Boston\n(C) Cambridge\n(D) Charleston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.080556392669678 }, { "id": "identity_271421", "image": "VizWiz/images/VizWiz_train_00011040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do you volunteer?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.2421875, "loss": 4.387035846710205 }, { "id": "identity_271462", "image": "ScienceQA/images/4591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Tuvalu\n(B) Fiji\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.453125, "loss": 4.100266456604004 }, { "id": "identity_12945", "image": "ScienceQA/images/15022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) slippery\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.25, "loss": 4.520022392272949 }, { "id": "identity_316186", "image": "FigureQA/images/28513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the larger portion?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.375, "loss": 4.641100883483887 }, { "id": "identity_8745", "image": "PMC-VQA/images/PMC5973067_Figure1_319824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the giant tumor?\nChoices:\n(A) Liver \n(B) Lungs \n(C) Brain \n(D) Kidneys" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.171875, "loss": 4.223657131195068 }, { "id": "identity_229914", "image": "IconQA/images/77318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does this picture have, and what are their orientations?" }, { "from": "gpt", "value": "The answer is One vertical line of symmetry" } ], "clip_score": 23.875, "loss": 4.095357894897461 }, { "id": "identity_3621", "image": "DVQA/images/bar_train_00161010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left?" }, { "from": "gpt", "value": "The answer is stock" } ], "clip_score": 22.875, "loss": 4.168131351470947 }, { "id": "identity_22516", "image": "A-OKVQA/images/73445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the method of locomotion here powered?\nChoices:\n(A) gasoline\n(B) hay\n(C) oil\n(D) coal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.546875, "loss": 4.260470390319824 }, { "id": "identity_334265", "image": "FigureQA/images/24549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.96875, "loss": 4.818890571594238 }, { "id": "identity_13259", "image": "VizWiz/images/VizWiz_train_00006758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is your name?" }, { "from": "gpt", "value": "The answer is autumn" } ], "clip_score": 10.7890625, "loss": 4.118711948394775 }, { "id": "identity_2627", "image": "VQA-RAD/images/synpic23803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which side is the cardiac border more obscured?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 23.375, "loss": 4.072896480560303 }, { "id": "identity_12267", "image": "TextVQA/images/3e569f2f4c818ed6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first word on the left page?" }, { "from": "gpt", "value": "The answer is upon" } ], "clip_score": 17.109375, "loss": 4.713057518005371 }, { "id": "identity_265194", "image": "FigureQA/images/79142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Sky Blue or Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.765625, "loss": 4.539465427398682 }, { "id": "identity_282828", "image": "DVQA/images/bar_train_00089131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the color indianred?" }, { "from": "gpt", "value": "The answer is teeth" } ], "clip_score": 22.90625, "loss": 5.072927474975586 }, { "id": "identity_129178", "image": "TextVQA/images/362b694bf08d174d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the chairs on the stage?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.578125, "loss": 4.293354511260986 }, { "id": "identity_34956", "image": "A-OKVQA/images/51396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the rack on the far left used for?\nChoices:\n(A) skis\n(B) hats\n(C) coats\n(D) goggles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.140625, "loss": 4.554592609405518 }, { "id": "identity_309265", "image": "PMC-VQA/images/PMC4065664_fig4_300318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the kidney is depicted in the image?\nChoices:\n(A) Bowman's capsule\n(B) Distal convoluted tubule\n(C) Proximal convoluted tubule\n(D) Loop of Henle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.953125, "loss": 4.078546524047852 }, { "id": "identity_331470", "image": "A-OKVQA/images/457011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual connected to who is about to be pulled by the horse?\nChoices:\n(A) sled\n(B) snowmobile\n(C) snowboard\n(D) skis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.6875, "loss": 4.009904384613037 }, { "id": "identity_264984", "image": "IconQA/images/24586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the time displayed on the clock?\nChoices:\n(A) 9:00\n(B) 4:00" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.421875, "loss": 4.056960105895996 }, { "id": "identity_24809", "image": "Super-CLEVR/images/superCLEVR_new_001446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of yellow objects left of the matte sedan greater than the number of brown cruisers behind the large brown object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.40625, "loss": 5.126243591308594 }, { "id": "identity_264969", "image": "VizWiz/images/VizWiz_train_00001572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of pocket knife is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 28.734375, "loss": 4.163174152374268 }, { "id": "identity_239365", "image": "VQA2.0/images/160848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the boat, and how many other boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Lady Joanne III, 5 boats" } ], "clip_score": 25.625, "loss": 4.156891345977783 }, { "id": "identity_66080", "image": "AI2D/images/2670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many solstices are indicated in this diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.96875, "loss": 4.459157466888428 }, { "id": "identity_331841", "image": "TabMWP/images/9507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.21875, "loss": 4.299306869506836 }, { "id": "identity_272929", "image": "IconQA/images/79798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of hearts is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.203125, "loss": 4.1183648109436035 }, { "id": "identity_32275", "image": "ScienceQA/images/20657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Jefferson City\n(B) Albany\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.28125, "loss": 4.213189125061035 }, { "id": "identity_325991", "image": "PlotQA/images/124494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is Bahrain" } ], "clip_score": 19.078125, "loss": 4.265985488891602 }, { "id": "identity_190366", "image": "A-OKVQA/images/190805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of green vegetables can be seen in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 24.28125, "loss": 5.080568790435791 }, { "id": "identity_208734", "image": "PMC-VQA/images/PMC3920997_got033-F5_266347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the white arrowheads in the image?" }, { "from": "gpt", "value": "The answer is Blood vessel with thrombus" } ], "clip_score": 26.53125, "loss": 4.524239540100098 }, { "id": "identity_198597", "image": "A-OKVQA/images/87096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are hanging in the foreground?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.703125, "loss": 4.2184672355651855 }, { "id": "identity_316495", "image": "IconQA/images/67953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a barn. What is the second image?\nChoices:\n(A) tractor\n(B) barn\n(C) sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.21875, "loss": 4.106822967529297 }, { "id": "identity_294922", "image": "PMC-VQA/images/PMC7896198_FIG2_371727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the axial view? \nChoices:\n(A) Free air in the abdomen.\n(B) Stent in the anterior rectal wall.\n(C) Both A and B.\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.484375, "loss": 4.195801734924316 }, { "id": "identity_328330", "image": "IconQA/images/90418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a bear. What is the ninth image?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.146249294281006 }, { "id": "identity_235024", "image": "VQA2.0/images/324650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing, and what is the color of his shorts?" }, { "from": "gpt", "value": "The answer is Surfing; red" } ], "clip_score": 25.5625, "loss": 4.13477897644043 }, { "id": "identity_291411", "image": "IconQA/images/40998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture comes after the first picture of a paw in the sequence?\nChoices:\n(A) house\n(B) dog\n(C) paw" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.890625, "loss": 4.4095540046691895 }, { "id": "identity_17138", "image": "PMC-VQA/images/PMC8079624_F3_429499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used for localization? \nChoices:\n(A) MRI \n(B) Ultrasound \n(C) CT \n(D) X-Ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.421875, "loss": 4.10239315032959 }, { "id": "identity_304985", "image": "VizWiz/images/VizWiz_train_00004976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who makes this item? Where can it be found?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.328125, "loss": 4.007775783538818 }, { "id": "identity_220341", "image": "TextVQA/images/01be054d92db60bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the whiteboard marker?" }, { "from": "gpt", "value": "The answer is Pentel" } ], "clip_score": 32.125, "loss": 4.745909690856934 }, { "id": "identity_322301", "image": "VQA2.0/images/221674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is behind them?" }, { "from": "gpt", "value": "The answer is mountains" } ], "clip_score": 15.21875, "loss": 4.0333147048950195 }, { "id": "identity_108440", "image": "PMC-VQA/images/PMC9281467_FIG5_340226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the individual's shirt in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.84375, "loss": 4.315897464752197 }, { "id": "identity_7111", "image": "VQA2.0/images/140690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are most of the stadium seats empty?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.15625, "loss": 4.221129417419434 }, { "id": "identity_319199", "image": "PMC-VQA/images/PMC8379116_Fig3_38406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this show?\nChoices:\n(A) MRI scan of an ankle\n(B) Coronal reformats of CT examinations of the knee\n(C) Coronal reformats of CT examinations of five hips\n(D) PET Scan of the brain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.53125, "loss": 4.03895378112793 }, { "id": "identity_38723", "image": "A-OKVQA/images/305858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The face of the namesake of this bridge is on which American dollar bill?\nChoices:\n(A) $20\n(B) $100\n(C) $50\n(D) $5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.171875, "loss": 4.093195915222168 }, { "id": "identity_330042", "image": "DocVQA/images/llln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the sender?" }, { "from": "gpt", "value": "The answer is RJRTI R&D" } ], "clip_score": 21.859375, "loss": 4.4408345222473145 }, { "id": "identity_203694", "image": "PMC-VQA/images/PMC8069390_diagnostics-11-00686-f002_425897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any labels or annotations present on the image that provide additional information?" }, { "from": "gpt", "value": "The answer is Only a letter label \"B" } ], "clip_score": 24.921875, "loss": 4.825282573699951 }, { "id": "identity_25456", "image": "PMC-VQA/images/PMC7181551_Fig1_158446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what part of the brain is the calcification observed? \nChoices:\n(A) the frontal lobe \n(B) the temporal lobe \n(C) the cerebellum \n(D) the white matter " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.765625, "loss": 4.748598575592041 }, { "id": "identity_9385", "image": "FigureQA/images/73184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Green have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.734375, "loss": 4.405900001525879 }, { "id": "identity_193253", "image": "VQA-AS/images/10831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman wearing on her feet?" }, { "from": "gpt", "value": "The answer is White shoes" } ], "clip_score": 13.9765625, "loss": 4.250338554382324 }, { "id": "identity_284750", "image": "PMC-VQA/images/PMC3820302_fig12_241670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where did the image originate?\nOptions:\n(A) National Parks Service\n(B) The White House\n(C) National Institutes of Health/Department of Health and Human Services\n(D) NASA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.015625, "loss": 4.337164402008057 }, { "id": "identity_117385", "image": "VizWiz/images/VizWiz_train_00001637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the hat worn by the character building the snowman?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.015625, "loss": 4.117140293121338 }, { "id": "identity_25132", "image": "PMC-VQA/images/PMC8024434_Fig6_410859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region of the brain is showing hypointense signal on T1-weighted imaging? \nChoices:\n(A) Corpus Callosum \n(B) Medial cerebellum \n(C) Lateral cerebellum \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.515625, "loss": 4.378443241119385 }, { "id": "identity_323762", "image": "IconQA/images/40221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.461945533752441 }, { "id": "identity_220844", "image": "VQA2.0/images/196170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible drinking glasses are on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.734375, "loss": 4.625554084777832 }, { "id": "identity_37884", "image": "PMC-VQA/images/PMC5542187_F3_199626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to acquire the images?\nChoices:\n(A) TEM \n(B) SEM \n(C) Confocal \n(D) Brightfield " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 4.148353576660156 }, { "id": "identity_295305", "image": "Super-CLEVR/images/superCLEVR_new_001906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the car that is made of the same material as the small brown object?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 21.75, "loss": 4.886512279510498 }, { "id": "identity_318846", "image": "PMC-VQA/images/PMC7723371_f2_316772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI indicate in the area of the artery on the left?\nChoices:\n(A) Hypersignal\n(B) Iso signal\n(C) Hyposignal\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.515625, "loss": 4.176046848297119 }, { "id": "identity_310377", "image": "VQA-AS/images/14407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is here?" }, { "from": "gpt", "value": "The answer is pillows" } ], "clip_score": 20.265625, "loss": 4.067870616912842 }, { "id": "identity_7625", "image": "PMC-VQA/images/PMC5627507_f0020_224057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image (D) showing? \nChoices:\n(A) A low-magnification view of a sample. \n(B) A high-magnification view of a sample. \n(C) A view of the entire sample. \n(D) A view of the top surface of the sample. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.28125, "loss": 4.1605095863342285 }, { "id": "identity_313787", "image": "TextVQA/images/631571cd72d6d688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who makes the whisky in this box?" }, { "from": "gpt", "value": "The answer is suntory" } ], "clip_score": 27.796875, "loss": 4.217092037200928 }, { "id": "identity_318835", "image": "A-OKVQA/images/567342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in abundance inside these silver round large cans?\nChoices:\n(A) meats\n(B) condiments\n(C) main dish\n(D) side dish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.375, "loss": 4.456597328186035 }, { "id": "identity_135750", "image": "MapQA/images/map_7366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the percentage range of centers in Illinois?" }, { "from": "gpt", "value": "The answer is Light brown" } ], "clip_score": 30.71875, "loss": 4.30202054977417 }, { "id": "identity_182544", "image": "VQA2.0/images/219859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the balloons in the background?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 16.171875, "loss": 4.005136489868164 }, { "id": "identity_321535", "image": "PMC-VQA/images/PMC6519979_FIG2_480536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to capture this image?\nChoices:\n(A) X-ray imaging \n(B) Computed Tomography (CT) \n(C) Magnetic Resonance Imaging (MRI) \n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.375, "loss": 4.126429557800293 }, { "id": "identity_263927", "image": "IconQA/images/12738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a square using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.045055389404297 }, { "id": "identity_266687", "image": "VQA-AS/images/14736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is nearest to the potted plant?" }, { "from": "gpt", "value": "The answer is window" } ], "clip_score": 24.203125, "loss": 5.519702434539795 }, { "id": "identity_306192", "image": "Super-CLEVR/images/superCLEVR_new_001453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the other big thing that has the same shape as the big cyan matte object?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 19.390625, "loss": 4.1259355545043945 }, { "id": "identity_18389", "image": "A-OKVQA/images/345822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the occupation of the man holding a camera?\nChoices:\n(A) actor\n(B) athlete\n(C) film director\n(D) reporter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.671875, "loss": 4.388721466064453 }, { "id": "identity_261565", "image": "A-OKVQA/images/16403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals at the back waiting for?\nChoices:\n(A) cruise boat\n(B) bus\n(C) cab\n(D) van" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.71875, "loss": 4.164218425750732 }, { "id": "identity_303232", "image": "PMC-VQA/images/PMC6452502_ccr32032-fig-0001_462275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the yellow arrow in this image?\nChoices:\n(A) Normal tonsil tissue\n(B) A tumor\n(C) Peritonsillar abscess\n(D) Swollen lymph node" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.15625, "loss": 4.5555195808410645 }, { "id": "identity_116764", "image": "VQA-AS/images/5305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.0625, "loss": 4.335402011871338 }, { "id": "identity_20716", "image": "TextVQA/images/5b3e87b396dc6ec4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what color is the remote?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 24.203125, "loss": 4.473898887634277 }, { "id": "identity_34344", "image": "DVQA/images/bar_train_00150451.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.296875, "loss": 4.3674726486206055 }, { "id": "identity_292457", "image": "A-OKVQA/images/484273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the flowers depicted in the painting to the left of the woman?\nChoices:\n(A) yellow\n(B) red\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.09375, "loss": 4.166903495788574 }, { "id": "identity_154071", "image": "ScienceQA/images/311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fastening mechanism does the belt have?" }, { "from": "gpt", "value": "The answer is Frame-style buckle with prong" } ], "clip_score": 30.8125, "loss": 4.027377128601074 }, { "id": "identity_130279", "image": "A-OKVQA/images/439837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible cabinet doors are there?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.390625, "loss": 4.584689617156982 }, { "id": "identity_6958", "image": "A-OKVQA/images/520043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What should the drivers do in this situation?\nChoices:\n(A) hurry up\n(B) be patient\n(C) press horn\n(D) call police" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.609375, "loss": 4.483129024505615 }, { "id": "identity_204442", "image": "TextVQA/images/9d0d88bbbbf4572e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue plastic stools are in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.59375, "loss": 4.505138874053955 }, { "id": "identity_63992", "image": "AI2D/images/3470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs are visible in the image of the bee's anatomy?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 33.90625, "loss": 4.465470790863037 }, { "id": "identity_298784", "image": "AI2D/images/2992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure labeled as A in the diagram. Is it:\n(A) pons\n(B) hypothalamus\n(C) cerebrum\n(D) medulla" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.40625, "loss": 4.386709690093994 }, { "id": "identity_32200", "image": "IconQA/images/64822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of ovals even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.34375, "loss": 4.049750328063965 }, { "id": "identity_127750", "image": "TextVQA/images/91bfa051c468e72c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the dominant color of the court?" }, { "from": "gpt", "value": "The answer is Light brown" } ], "clip_score": 20.1875, "loss": 4.073747158050537 }, { "id": "identity_25946", "image": "PMC-VQA/images/PMC7260710_F1_181000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging area depicted in the image? \nChoices:\n(A) 50 µm \n(B) 100 µm \n(C) 200 µm \n(D) 500 µm " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.40625, "loss": 4.34498929977417 }, { "id": "identity_13072", "image": "TextVQA/images/cf80e87381e2fc8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the admission to hallowfest 2007?" }, { "from": "gpt", "value": "The answer is samsung" } ], "clip_score": 14.1875, "loss": 4.989452362060547 }, { "id": "identity_296448", "image": "ScienceQA/images/13436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\nChoices:\n(A) Portland\n(B) Lexington\n(C) Frankfort\n(D) Kansas City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.1875, "loss": 4.078287124633789 }, { "id": "identity_327952", "image": "Super-CLEVR/images/superCLEVR_new_000195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer of these rubber tandem bikes than these small cars?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.90625, "loss": 4.966978549957275 }, { "id": "identity_37534", "image": "PMC-VQA/images/PMC8395179_ijms-22-08547-f004_43923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow in the image indicate?\nChoices:\n(A) Marker for Type II AECs \n(B) Cell Proliferation \n(C) Cell Death \n(D) Inflammation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.727999210357666 }, { "id": "identity_289249", "image": "PMC-VQA/images/PMC6940314_Fig3_93321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the image for patient 2?\nChoices:\n(A) CT scan\n(B) Fusion of DWI and 68Ga-PSMA-PET scans\n(C) 68Ga-PSMA-PET scan exclusively\n(D) DWI scan exclusively" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.421875, "loss": 4.402237415313721 }, { "id": "identity_317118", "image": "PMC-VQA/images/PMC7423083_F2_233126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the new lesion located?\nChoices:\n(A) Left side of the pons \n(B) Right side of the pons \n(C) Centrum semiovale \n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.859375, "loss": 4.795406818389893 }, { "id": "identity_108709", "image": "TextVQA/images/25d8358103eb68ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bottle cap next to the water bottle?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 16.6875, "loss": 4.002720832824707 }, { "id": "identity_14274", "image": "FigureQA/images/89059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.75, "loss": 4.198343753814697 }, { "id": "identity_329591", "image": "PMC-VQA/images/PMC5343592_f17_136822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being stained in these images?\nChoices:\n(A) Hippocampus\n(B) Amygdala\n(C) Thalamus\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.203125, "loss": 4.287770748138428 }, { "id": "identity_280419", "image": "ScienceQA/images/14126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this towel?\nChoices:\n(A) porcelain\n(B) cotton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.372788906097412 }, { "id": "identity_21508", "image": "PMC-VQA/images/PMC2844751_F0004_60300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of the tract shown in the image?\nChoices:\n(A) Anterior to posterior \n(B) Lateral to medial \n(C) Posterior to anterior \n(D) Medial to lateral " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.125, "loss": 4.56378698348999 }, { "id": "identity_10451", "image": "PMC-VQA/images/PMC6487366_fig0020_472455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the metallic clips seen in the image? \nChoices:\n(A) White arrows \n(B) Black arrowheads \n(C) Gray arrows \n(D) Silver arrowheads " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.90625, "loss": 4.029447078704834 }, { "id": "identity_297660", "image": "A-OKVQA/images/530030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object described as a white swirly thing?\nChoices:\n(A) marshmallow\n(B) piglet\n(C) ice cream\n(D) cloud" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6640625, "loss": 4.1788763999938965 }, { "id": "identity_13018", "image": "Super-CLEVR/images/superCLEVR_new_000813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the articulated bus have the same size as the yellow car?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.515625, "loss": 5.1764235496521 }, { "id": "identity_117976", "image": "VQA2.0/images/268533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the candle on the right side of the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 18.546875, "loss": 4.348954200744629 }, { "id": "identity_113878", "image": "Super-CLEVR/images/superCLEVR_new_000875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly above the yellow airplane?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 22.53125, "loss": 4.3033270835876465 }, { "id": "identity_295781", "image": "VQA2.0/images/390345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the gender of the individual in the photo?" }, { "from": "gpt", "value": "The answer is man" } ], "clip_score": 18.34375, "loss": 5.11160135269165 }, { "id": "identity_317019", "image": "AI2D/images/2972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area in the diagram indicates the golgi?\nChoices:\n(A) A\n(B) F\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.828125, "loss": 4.040411472320557 }, { "id": "identity_289466", "image": "ScienceQA/images/10792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.5625, "loss": 4.011032581329346 }, { "id": "identity_200705", "image": "A-OKVQA/images/348027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the outfield?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.28125, "loss": 4.697080135345459 }, { "id": "identity_298560", "image": "PMC-VQA/images/PMC6126002_Fig3_364099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image?\nOptions:\n(A) Magnetic resonance imaging\n(B) Computed tomography scan\n(C) Positron emission tomography\n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.359375, "loss": 4.276099681854248 }, { "id": "identity_329576", "image": "DVQA/images/bar_train_00181430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy for any of these datasets?" }, { "from": "gpt", "value": "The answer is output" } ], "clip_score": 34.5625, "loss": 4.315633296966553 }, { "id": "identity_115556", "image": "TextVQA/images/58ba95a1f717233f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians can be seen on the crosswalk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.40625, "loss": 4.504452705383301 }, { "id": "identity_216634", "image": "PMC-VQA/images/PMC7410909_Fig6_229959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.828125, "loss": 4.386298179626465 }, { "id": "identity_101650", "image": "IconQA/images/105822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each section of the shape represents a day of the week, which section would represent Wednesday?" }, { "from": "gpt", "value": "The answer is The fourth section" } ], "clip_score": 29.875, "loss": 4.072425842285156 }, { "id": "identity_307049", "image": "TextVQA/images/8abb73b754c7986c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the store on this side?" }, { "from": "gpt", "value": "The answer is lanchonte" } ], "clip_score": 15.6796875, "loss": 5.260767459869385 }, { "id": "identity_330601", "image": "DVQA/images/bar_train_00093829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this bar?" }, { "from": "gpt", "value": "The answer is phone" } ], "clip_score": 19.875, "loss": 4.381924629211426 }, { "id": "identity_190660", "image": "Super-CLEVR/images/superCLEVR_new_001923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle located to the left of the red car?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 28.875, "loss": 4.666747093200684 }, { "id": "identity_308095", "image": "DVQA/images/bar_train_00112747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left in each set?" }, { "from": "gpt", "value": "The answer is drop" } ], "clip_score": 16.078125, "loss": 4.98007869720459 }, { "id": "identity_198698", "image": "PMC-VQA/images/PMC5529383_F1_196164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label in the top right corner indicating?" }, { "from": "gpt", "value": "The answer is Head end of the patient" } ], "clip_score": 26.078125, "loss": 4.794288635253906 }, { "id": "identity_226972", "image": "A-OKVQA/images/454813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible implications of the agreement being signed in the image, considering the formal setting and the presence of multiple individuals?" }, { "from": "gpt", "value": "The answer is New policies, business ventures, or international cooperation" } ], "clip_score": 24.53125, "loss": 4.03126859664917 }, { "id": "identity_266849", "image": "AI2D/images/3712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After leaving the rectum, where does waste go?\nChoices:\n(A) stomach\n(B) anal canal\n(C) diaphragm\n(D) liver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.703125, "loss": 4.31862735748291 }, { "id": "identity_211991", "image": "A-OKVQA/images/217989.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pendant lights are hanging above the kitchen island?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.015625, "loss": 4.206808090209961 }, { "id": "identity_216366", "image": "TextVQA/images/6a20cf4be12a6116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of pottery have a visible handle on the leftmost shelf?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.328125, "loss": 4.724373817443848 }, { "id": "identity_111744", "image": "VQA2.0/images/546940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.96875, "loss": 4.32658576965332 }, { "id": "identity_291909", "image": "A-OKVQA/images/79869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the individual who utilizes this area enjoy collecting? \n(A) key chains\n(B) trolls\n(C) cleaning supplies\n(D) laptops" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.65625, "loss": 4.431297302246094 }, { "id": "identity_29950", "image": "PMC-VQA/images/PMC6878051_ccr32467-fig-0001_74091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical structure is labeled as AA in the image? \nChoices:\n(A) Subclavian vein \n(B) Subclavian artery \n(C) Brachial vein \n(D) Brachial artery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.109375, "loss": 4.521411895751953 }, { "id": "identity_272227", "image": "A-OKVQA/images/247909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the stuffed animals being showcased? \n(A) as trophies\n(B) for appreciation\n(C) for sale\n(D) as art" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.4375, "loss": 4.0636491775512695 }, { "id": "identity_335752", "image": "AI2D/images/4221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the rock cycle shown, what is illustrated by this point?\nChoices:\n(A) xtallization\n(B) lithification\n(C) metamorphisis\n(D) melting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.4375, "loss": 4.949900150299072 }, { "id": "identity_24420", "image": "DocVQA/images/zjlk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Editor for The Chronicle?" }, { "from": "gpt", "value": "The answer is M. Pat Schmidt" } ], "clip_score": 20.484375, "loss": 4.078965663909912 }, { "id": "identity_324533", "image": "TextVQA/images/30096e7e939f4fc1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of blues is mentioned on this sign?" }, { "from": "gpt", "value": "The answer is speed blitz" } ], "clip_score": 15.578125, "loss": 5.799819469451904 }, { "id": "identity_134111", "image": "DocVQA/images/syvg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many presentations are scheduled before the lunch break?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.0625, "loss": 4.361966133117676 }, { "id": "identity_186967", "image": "PMC-VQA/images/PMC4716828_F1_462831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain in this image?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 32.59375, "loss": 4.016019344329834 }, { "id": "identity_272845", "image": "ScienceQA/images/15915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific humidity level measured in the highlighted area shown?\nChoices:\n(A) 16 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 21 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.125, "loss": 4.086525917053223 }, { "id": "identity_315458", "image": "AI2D/images/3917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is the convolution of a specific lobe that is bounded in front by a certain sulcus?\nChoices:\n(A) precentral gyrus\n(B) postcentral gyrus\n(C) central sulcus\n(D) lateral sulcus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.765625, "loss": 4.23324728012085 }, { "id": "identity_329418", "image": "A-OKVQA/images/571746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people next to the train doing?\nChoices:\n(A) sledding\n(B) running\n(C) skiing\n(D) rolling" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.4375, "loss": 4.456827640533447 }, { "id": "identity_187800", "image": "PMC-VQA/images/PMC7602118_diagnostics-10-00812-f001_288154.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screws are inserted in the bone on the left side of the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 30.390625, "loss": 4.925091743469238 }, { "id": "identity_269999", "image": "PMC-VQA/images/PMC9176240_f0006_304508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscope was utilized to capture the images: \n(A) Light microscope \n(B) X-ray microscope \n(C) Fluorescence microscope \n(D) Electron microscope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.1875, "loss": 4.441679954528809 }, { "id": "identity_264857", "image": "PMC-VQA/images/PMC7140673_cells-09-00730-f002_147347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is depicted in red?\n(A) Macrophages\n(B) CD163 staining\n(C) IRF5 expression\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.53125, "loss": 4.496056079864502 }, { "id": "identity_23708", "image": "PMC-VQA/images/PMC3533984_F4_176067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the SEM images? \nChoices:\n(A) Cells \n(B) Carbon Nanofibers (CNFs) \n(C) Fe Catalysts \n(D) Electrons " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.3500895500183105 }, { "id": "identity_4524", "image": "ScienceQA/images/15365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.40625, "loss": 4.080794334411621 }, { "id": "identity_183510", "image": "A-OKVQA/images/26164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct logos can be seen on the wall behind the snowboarder?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.71875, "loss": 4.144012451171875 }, { "id": "identity_127535", "image": "VQA2.0/images/416978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella closest to the person in the foreground?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.5, "loss": 4.643149375915527 }, { "id": "identity_53307", "image": "DocVQA/images/yzbd0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the discussion leader for the II Session - P.M. on the fourth day?" }, { "from": "gpt", "value": "The answer is Not mentioned" } ], "clip_score": 22.390625, "loss": 4.705430030822754 }, { "id": "identity_25821", "image": "A-OKVQA/images/304158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of vehicle is the yellow thing?\nChoices:\n(A) tour bus\n(B) truck\n(C) school bus\n(D) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.109375, "loss": 4.6377787590026855 }, { "id": "identity_264732", "image": "TQA/images/abc_question_images/photosynthesis_17481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents carbon dioxide from the following choices:\n(A) F\n(B) C\n(C) J\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5625, "loss": 4.384627342224121 }, { "id": "identity_311200", "image": "TextVQA/images/45d9b08098b714cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of this book?" }, { "from": "gpt", "value": "The answer is snow crash" } ], "clip_score": 32.53125, "loss": 4.035068511962891 }, { "id": "identity_277852", "image": "A-OKVQA/images/19624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the drink inside the cup shown on the top left of the billboard?\nChoices:\n(A) pink\n(B) red\n(C) blue\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.796875, "loss": 4.038661479949951 }, { "id": "identity_271798", "image": "FigureQA/images/83076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tan's area under the curve the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.734375, "loss": 4.174144744873047 }, { "id": "identity_327086", "image": "PMC-VQA/images/PMC4018937_F2_287976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the perspective of the image?\nChoices:\n(A) Anterior \n(B) Lateral \n(C) Posterior \n(D) Dorsal" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.625, "loss": 4.703488349914551 }, { "id": "identity_322037", "image": "FigureQA/images/80362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.90625, "loss": 4.122504234313965 }, { "id": "identity_263988", "image": "Super-CLEVR/images/superCLEVR_new_001296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the small blue object and the metal item in front of the large shiny utility bike of the same shape?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.1875, "loss": 5.454718589782715 }, { "id": "identity_57505", "image": "PlotQA/images/54854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have a percentage of food exports less than 10% in 1962?" }, { "from": "gpt", "value": "The answer is Two countries" } ], "clip_score": 30.828125, "loss": 4.51176118850708 }, { "id": "identity_311664", "image": "DVQA/images/bar_train_00087474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which method has the highest total accuracy across all datasets?" }, { "from": "gpt", "value": "The answer is scope" } ], "clip_score": 30.40625, "loss": 4.306302070617676 }, { "id": "identity_318367", "image": "TextVQA/images/5ecd7c8ef530ac52.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of books are being promoted?" }, { "from": "gpt", "value": "The answer is new" } ], "clip_score": 22.765625, "loss": 4.110291481018066 }, { "id": "identity_282998", "image": "A-OKVQA/images/245598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the boy in red wearing a glove?\nChoices:\n(A) fashion\n(B) health\n(C) warmth\n(D) to catch" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.796875, "loss": 4.574132442474365 }, { "id": "identity_3118", "image": "PMC-VQA/images/PMC7149892_Fig3_149735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the image captured? \nChoices:\n(A) virus \n(B) bacteria \n(C) plant \n(D) animal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.625, "loss": 5.496457576751709 }, { "id": "identity_251376", "image": "A-OKVQA/images/327713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the image, and what type of station is in this area?" }, { "from": "gpt", "value": "The answer is Four vehicles, gas station" } ], "clip_score": 21.5, "loss": 4.377015590667725 }, { "id": "identity_198644", "image": "TextVQA/images/0ab462028eb0810e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books on the top shelf have a predominantly blue spine?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.828125, "loss": 4.612269878387451 }, { "id": "identity_128396", "image": "A-OKVQA/images/38572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object on the wall to the right of the trainer?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 13.59375, "loss": 4.107921600341797 }, { "id": "identity_15944", "image": "IconQA/images/33266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, which color are you less likely to pick?\nChoices:\n(A) black\n(B) light blue\n(C) neither; black and light blue are equally likely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.515625, "loss": 4.662719249725342 }, { "id": "identity_135520", "image": "DocVQA/images/pkbh0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 21.03125, "loss": 4.384378433227539 }, { "id": "identity_274937", "image": "A-OKVQA/images/140291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the frisbee being held in the right hand of the man in the background? Is it:\n(A) white\n(B) blue\n(C) red\n(D) yellow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.90625, "loss": 4.067752838134766 }, { "id": "identity_186875", "image": "VizWiz/images/VizWiz_train_00019559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written on the tag attached to the hammer?" }, { "from": "gpt", "value": "The answer is VOS" } ], "clip_score": 18.953125, "loss": 5.238895893096924 }, { "id": "identity_23085", "image": "FigureQA/images/5207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Aqua the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.96875, "loss": 4.4008097648620605 }, { "id": "identity_280953", "image": "ScienceQA/images/19997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) North Carolina\n(B) Georgia\n(C) Virginia\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.3125, "loss": 4.434415340423584 }, { "id": "identity_1084", "image": "AI2D/images/4047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone is at the center of the sun?\nChoices:\n(A) convection\n(B) radiative\n(C) nuclear\n(D) productive" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.5625, "loss": 4.1916351318359375 }, { "id": "identity_220163", "image": "TextVQA/images/142076607dbdfa59.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What airline operates this aircraft?" }, { "from": "gpt", "value": "The answer is Air China" } ], "clip_score": 26.625, "loss": 4.596789836883545 }, { "id": "identity_112074", "image": "VQA2.0/images/535278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible personal photographs are there on the wall?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.484375, "loss": 4.8456854820251465 }, { "id": "identity_200715", "image": "PMC-VQA/images/PMC5160011_Fig1_105798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general shape of the object surrounding the blurred area?" }, { "from": "gpt", "value": "The answer is Helmet shape" } ], "clip_score": 19.265625, "loss": 4.491316795349121 }, { "id": "identity_6298", "image": "A-OKVQA/images/311076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who captains the team of the jersey in red?\nChoices:\n(A) mario iginla\n(B) jonathan toes\n(C) lionel messi\n(D) bill reid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.078125, "loss": 4.304725646972656 }, { "id": "identity_15346", "image": "AI2D/images/3938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part does label a refer to?\nChoices:\n(A) Heart\n(B) Small intestines\n(C) Stomach\n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.53125, "loss": 4.914341449737549 }, { "id": "identity_263829", "image": "PMC-VQA/images/PMC7959875_FIG2_391530.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the computed tomography image? Is it:\n(A) Lungs\n(B) Abdomen\n(C) Heart\n(D) Kidneys" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 5.194989204406738 }, { "id": "identity_303192", "image": "Super-CLEVR/images/superCLEVR_new_001717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the metallic bicycle that is the same size as the dirtbike?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 23.328125, "loss": 4.37369441986084 }, { "id": "identity_9862", "image": "ScienceQA/images/14953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Iowa?\nChoices:\n(A) Des Moines\n(B) Jefferson City\n(C) Minneapolis\n(D) Davenport" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.3125, "loss": 4.250128746032715 }, { "id": "identity_322214", "image": "A-OKVQA/images/122350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of food is in this?\nChoices:\n(A) pita\n(B) tortilla\n(C) laffa\n(D) pie" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.265625, "loss": 4.076137065887451 }, { "id": "identity_25030", "image": "VQA2.0/images/225104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the spectators leaning against?" }, { "from": "gpt", "value": "The answer is wall" } ], "clip_score": 18.890625, "loss": 4.177865982055664 }, { "id": "identity_327249", "image": "ScienceQA/images/11303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Cambridge\n(B) Harrisburg\n(C) Hartford\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 4.890480995178223 }, { "id": "identity_22026", "image": "A-OKVQA/images/181084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the police monitoring?\nChoices:\n(A) accident\n(B) balloon sale\n(C) riot\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.40625, "loss": 4.195700645446777 }, { "id": "identity_325641", "image": "FigureQA/images/26154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.015625, "loss": 4.033252716064453 }, { "id": "identity_23552", "image": "DVQA/images/bar_train_00068408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the seventh bar from the left?" }, { "from": "gpt", "value": "The answer is plot" } ], "clip_score": 22.609375, "loss": 4.957663059234619 }, { "id": "identity_282467", "image": "VQA2.0/images/232250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the chairs?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 18.78125, "loss": 4.211350917816162 }, { "id": "identity_184107", "image": "VQA-AS/images/15721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bushes are in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.84375, "loss": 4.9034881591796875 }, { "id": "identity_9347", "image": "AI2D/images/301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Grass is classified as what?\nChoices:\n(A) Consumer\n(B) Producer\n(C) Decomposer\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.96875, "loss": 4.241456508636475 }, { "id": "identity_117199", "image": "VQA2.0/images/542974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the carriage?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.890625, "loss": 4.137681007385254 }, { "id": "identity_265480", "image": "AI2D/images/4146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe a sweet crystalline or powdered substance?\nChoices:\n(A) soil\n(B) powder\n(C) sugar\n(D) salt" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.5, "loss": 4.012803077697754 }, { "id": "identity_195805", "image": "VQA2.0/images/356664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible sections does the train car have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.953125, "loss": 4.404234886169434 }, { "id": "identity_297438", "image": "PMC-VQA/images/PMC5738007_F1_255092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical imaging technique is utilized in this scenario?\nOptions:\n(A) Ultrasound\n(B) X-ray\n(C) MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.0, "loss": 4.787317276000977 }, { "id": "identity_20014", "image": "IconQA/images/85752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) certain\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.328125, "loss": 4.463367938995361 }, { "id": "identity_305853", "image": "FigureQA/images/61050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.078125, "loss": 4.707015514373779 }, { "id": "identity_328289", "image": "TextVQA/images/9d5a46fdb218e3d9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What genre is in the top left corner?" }, { "from": "gpt", "value": "The answer is romance" } ], "clip_score": 15.1875, "loss": 4.017642974853516 }, { "id": "identity_283904", "image": "A-OKVQA/images/97667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the most commonly used leavening agent in this scenario? Was it:\n(A) none\n(B) salt\n(C) yeast\n(D) vegemite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.4375, "loss": 4.615143775939941 }, { "id": "identity_2710", "image": "GEOS/images/062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, if x < 90*\\degree then which of the following must be true?\nChoices:\n(A) l is parallel to m\n(B) l is perpendicular to m\n(C) y < 90 degrees\n(D) y = 90 degrees\n(E) y > 90 degrees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.34375, "loss": 4.221040725708008 }, { "id": "identity_35596", "image": "VQA2.0/images/186233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillars are on the building?" }, { "from": "gpt", "value": "The answer is cannot tell" } ], "clip_score": 17.0625, "loss": 4.718924045562744 }, { "id": "identity_4618", "image": "ScienceQA/images/1140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Providence\n(B) Albany\n(C) Newport\n(D) Augusta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.9375, "loss": 4.143955230712891 }, { "id": "identity_1991", "image": "ScienceQA/images/6672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the fish below.\nChoices:\n(A) salmon\n(B) bison" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.578125, "loss": 4.301553726196289 }, { "id": "identity_142991", "image": "DocVQA/images/hxyc0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest allocation for Other expenses?" }, { "from": "gpt", "value": "The answer is RDA and Fish (tie)" } ], "clip_score": 22.9375, "loss": 4.645064830780029 }, { "id": "identity_2329", "image": "ScienceQA/images/17916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the text, how does a sloth's fur help protect it?\nChoices:\n(A) A sloth's fur protects its important organs.\n(B) A sloth's fur helps it hide from predators.\n(C) A sloth's fur helps it cling to tree branches." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 4.298039436340332 }, { "id": "identity_272147", "image": "PMC-VQA/images/PMC5874961_acm20315-fig-0012_295162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is represented by the brown line?\nChoices:\n(A) Two beam directions.\n(B) Isocenter plane.\n(C) Outline of PTV.\n(D) Outlines of right and left lungs." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.3125, "loss": 4.6376118659973145 }, { "id": "identity_263556", "image": "ScienceQA/images/14377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Jason's experiment to answer?\nChoices:\n(A) Will steel nails rust faster when submerged in a large volume of liquid compared to a small volume?\n(B) Will steel nails rust faster in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.84375, "loss": 4.48415470123291 }, { "id": "identity_261949", "image": "Super-CLEVR/images/superCLEVR_new_000374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that are the same size as the gray shiny car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.515625, "loss": 5.29005241394043 }, { "id": "identity_29957", "image": "PMC-VQA/images/PMC7560692_fig0010_274447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the image? \nChoices:\n(A) Show double-label staining of Aβ and Iba1 \n(B) Show double-label staining of HAP1 and NeuN \n(C) Display the structure of neurons \n(D) Show a comparison between NeuN and Iba1. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.634584903717041 }, { "id": "identity_29417", "image": "PMC-VQA/images/PMC6462967_children-06-00043-f010_465086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the abnormality shown in the left lateral ventricle? \nChoices:\n(A) dilatation \n(B) hypertrophy \n(C) hypotrophy \n(D) atrophy " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.34375, "loss": 4.322290897369385 }, { "id": "identity_72052", "image": "ScienceQA/images/10243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the country located to the northwest of the highlighted country?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 28.796875, "loss": 4.535423755645752 }, { "id": "identity_290898", "image": "PMC-VQA/images/PMC5382903_f3_150221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image of ALCAPA, which arteries are indicated by the white arrows?\nChoices:\n(A) Left coronary artery\n(B) Right coronary artery\n(C) Both left and right coronary artery\n(D) None of the above options." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.190211772918701 }, { "id": "identity_295338", "image": "VizWiz/images/VizWiz_train_00003865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what is shown in this image?" }, { "from": "gpt", "value": "The answer is medical equipment" } ], "clip_score": 29.71875, "loss": 4.0581278800964355 }, { "id": "identity_187589", "image": "PMC-VQA/images/PMC7523872_F1_262811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the abnormality indicated by the arrow located on the left or right side of the image?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 21.65625, "loss": 4.054336071014404 }, { "id": "identity_243644", "image": "PMC-VQA/images/PMC3116541_F0003_99168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which area of the optic disc does the medial PCA supply blood to, and what is the primary function of this area in visual processing?" }, { "from": "gpt", "value": "The answer is Transmits visual information from the temporal visual field" } ], "clip_score": 27.484375, "loss": 4.142612457275391 }, { "id": "identity_16705", "image": "ScienceQA/images/1371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Georgia?\nChoices:\n(A) Boise\n(B) Oklahoma City\n(C) Atlanta\n(D) Savannah" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.59375, "loss": 4.199074745178223 }, { "id": "identity_5366", "image": "FigureQA/images/20778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.445455074310303 }, { "id": "identity_32559", "image": "PMC-VQA/images/PMC3376883_F3_141804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the PET scan image show in areas that received radiotherapy? \nChoices:\n(A) inflammation \n(B) hot lesions \n(C) cold lesions \n(D) no change " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.25, "loss": 5.285921573638916 }, { "id": "identity_10072", "image": "ScienceQA/images/15236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Orlando\n(C) Baton Rouge\n(D) Tampa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.703125, "loss": 4.105281829833984 }, { "id": "identity_36926", "image": "PMC-VQA/images/PMC7891795_FIG1_370421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the evidence of sinus disease in the CT scan? \nChoices:\n(A) Right frontal sinus \n(B) Left sphenoid sinus \n(C) Right sphenoid sinus \n(D) Left maxillary sinus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 4.4267778396606445 }, { "id": "identity_204127", "image": "A-OKVQA/images/107054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of barrier separates the giraffes from the open area?" }, { "from": "gpt", "value": "The answer is A low brick wall" } ], "clip_score": 24.3125, "loss": 4.2899603843688965 }, { "id": "identity_282312", "image": "A-OKVQA/images/166477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man engaging in with his hands raised in the air?\nChoices:\n(A) fighting\n(B) directing traffic\n(C) swimming\n(D) tricks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.828125, "loss": 4.469044208526611 }, { "id": "identity_321989", "image": "A-OKVQA/images/417218.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who uses the area shown here?\nChoices:\n(A) activists\n(B) diners\n(C) merchants\n(D) vagabonds" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.96875, "loss": 4.546870231628418 }, { "id": "identity_255565", "image": "A-OKVQA/images/174601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the environment and the position of the green garment, what are the possible reasons for it being hung there, and which is the most likely reason based on the context of the image?" }, { "from": "gpt", "value": "The answer is drying it" } ], "clip_score": 18.140625, "loss": 6.120485782623291 }, { "id": "identity_20838", "image": "ScienceQA/images/17320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Saint Paul\n(B) Minneapolis\n(C) Montgomery\n(D) New York City" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.2716169357299805 }, { "id": "identity_328951", "image": "PMC-VQA/images/PMC3131277_pone-0021297-g001_101560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the central image?\nChoices:\n(A) ROI of a reconstructed grey-level image\n(B) Customized linear filtering\n(C) Customized non-linear filtering\n(D) Segmentation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.9375, "loss": 5.180733680725098 }, { "id": "identity_112693", "image": "VQA2.0/images/283884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible cables are above the train?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.9375, "loss": 4.455630302429199 }, { "id": "identity_216066", "image": "A-OKVQA/images/365456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the court?" }, { "from": "gpt", "value": "The answer is Two players" } ], "clip_score": 22.203125, "loss": 4.481418132781982 }, { "id": "identity_330754", "image": "PMC-VQA/images/PMC4150498_fig1_316889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the state of the left femoral head when admitted?\nChoices:\n(A) Normal\n(B) Swollen\n(C) Collapsed\n(D) Enlarged" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.210759162902832 }, { "id": "identity_75644", "image": "ScienceQA/images/12087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are typically found in a laboratory setting?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 14.5703125, "loss": 4.945218086242676 }, { "id": "identity_204405", "image": "A-OKVQA/images/254177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buttons are visible on the man's suit jacket?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.34375, "loss": 4.359500885009766 }, { "id": "identity_281796", "image": "Super-CLEVR/images/superCLEVR_new_000526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large cars than small blue metal regular cars?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.890625, "loss": 4.917206764221191 }, { "id": "identity_329907", "image": "PMC-VQA/images/PMC2553761_F5_28181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the fluorescence in the merged image? \nChoices:\n(A) Magenta \n(B) Green \n(C) Yellow \n(D) Blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.546875, "loss": 4.666593074798584 }, { "id": "identity_208486", "image": "A-OKVQA/images/469781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic cones are visible around the utility truck?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.96875, "loss": 4.616564750671387 }, { "id": "identity_127580", "image": "VQA2.0/images/509749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.9375, "loss": 4.310000419616699 }, { "id": "identity_129112", "image": "VQA2.0/images/432328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the fence in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.484375, "loss": 4.226614952087402 }, { "id": "identity_61498", "image": "DocVQA/images/jtnh0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the person with the unpublished phone number?" }, { "from": "gpt", "value": "The answer is William Wyman" } ], "clip_score": 22.0625, "loss": 4.393594741821289 }, { "id": "identity_199951", "image": "PMC-VQA/images/PMC5579201_Fig6_210197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct structures labeled with \"DA\" can be observed in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.546875, "loss": 4.441932678222656 }, { "id": "identity_326987", "image": "PMC-VQA/images/PMC7916940_life-11-00136-f007_378526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells is indicated by the perinuclear cytoplasm in these images?\nChoices:\n(A) Schwann cells\n(B) Oligodendrocytes\n(C) Astrocytes\n(D) Ependymal cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.515625, "loss": 4.150318145751953 }, { "id": "identity_282404", "image": "A-OKVQA/images/213592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What objects are being held by the men on horseback in their hands?\nChoices:\n(A) brooms\n(B) mallets\n(C) clubs\n(D) bats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.859375, "loss": 4.131355285644531 }, { "id": "identity_132245", "image": "Super-CLEVR/images/superCLEVR_new_000703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have a matte finish?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.015625, "loss": 4.665815830230713 }, { "id": "identity_6032", "image": "ScienceQA/images/8866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this food web, which organism contains matter that eventually moves to the mushroom?\nChoices:\n(A) Arctic fox\n(B) collared lemming\n(C) barren-ground caribou\n(D) bear sedge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.1875, "loss": 4.546910285949707 }, { "id": "identity_37367", "image": "PMC-VQA/images/PMC6951026_Fig5_97110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the abnormal vessels originating from?\nChoices:\n(A) Left internal thoracic artery (LITA) \n(B) Right subclavian artery (RSA) \n(C) Lateral thoracic artery (LTA) \n(D) Axillary artery " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.234375, "loss": 4.197096824645996 }, { "id": "identity_8398", "image": "IconQA/images/43413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a fish. Which picture is second?\nChoices:\n(A) turtle\n(B) fish\n(C) sub" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.379755973815918 }, { "id": "identity_328284", "image": "A-OKVQA/images/579739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of day is shown in the image?\nChoices:\n(A) morning\n(B) evening\n(C) afternoon\n(D) night" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.578125, "loss": 4.436675071716309 }, { "id": "identity_5887", "image": "PMC-VQA/images/PMC6028832_Fig5_336555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image was used to identify white matter abnormalities? \nChoices:\n(A) Axial T1-weighted \n(B) Coronal T2-weighted \n(C) Sagittal T2-weighted \n(D) Axial FLAIR image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.8125, "loss": 4.19185733795166 }, { "id": "identity_277541", "image": "AI2D/images/3899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the cambium that develops between the vascular bundles?\nChoices:\n(A) B\n(B) A\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.15625, "loss": 4.0234222412109375 }, { "id": "identity_194732", "image": "A-OKVQA/images/244032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the vehicles in the background?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 16.203125, "loss": 4.733071327209473 }, { "id": "identity_11390", "image": "AI2D/images/3754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is known as oral cavity ?\nChoices:\n(A) Tongue\n(B) sinus\n(C) Mouth\n(D) nasal cavity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.21875, "loss": 4.122784614562988 }, { "id": "identity_26254", "image": "A-OKVQA/images/254714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who was the other friend besides the butcher of the person whose name appears before the word market?\nChoices:\n(A) landscaper\n(B) candlestick maker\n(C) plumber\n(D) gardener" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.40625, "loss": 4.047063827514648 }, { "id": "identity_196261", "image": "PMC-VQA/images/PMC7328697_FIG1_202923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the blue arrow pointing to in the image?" }, { "from": "gpt", "value": "The answer is Gallstones" } ], "clip_score": 26.203125, "loss": 4.192119121551514 }, { "id": "identity_189825", "image": "A-OKVQA/images/519784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the overhead sign in the middle of the street?" }, { "from": "gpt", "value": "The answer is Vitagen" } ], "clip_score": 24.0, "loss": 4.081888198852539 }, { "id": "identity_313245", "image": "TextVQA/images/be80f8b750c60c6c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What words are on this shirt?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.546875, "loss": 4.4971089363098145 }, { "id": "identity_16205", "image": "Super-CLEVR/images/superCLEVR_new_001437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more brown rubber fighters that are in front of the big regular bus than tiny brown matte fighters that are behind the big blue metal motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.484375, "loss": 5.147614479064941 }, { "id": "identity_268055", "image": "DocVQA/images/trkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From whom is this letter?" }, { "from": "gpt", "value": "The answer is Theodore R. Vallance" } ], "clip_score": 25.6875, "loss": 4.650805950164795 }, { "id": "identity_322803", "image": "A-OKVQA/images/312926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person holding?\nChoices:\n(A) tray\n(B) soda\n(C) baton\n(D) child's hand" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.484375, "loss": 4.3137078285217285 }, { "id": "identity_3514", "image": "AI2D/images/3634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shwn right above the ulnar artery?\nChoices:\n(A) radial artery\n(B) lliac artery\n(C) femoral vein\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.078125, "loss": 4.921550273895264 }, { "id": "identity_128208", "image": "A-OKVQA/images/183166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.0, "loss": 4.544354438781738 }, { "id": "identity_296043", "image": "A-OKVQA/images/569747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of damage has occurred in this street?\nChoices:\n(A) cracked ground\n(B) illegal construction\n(C) graffiti\n(D) arson" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2109375, "loss": 4.29396915435791 }, { "id": "identity_20101", "image": "PMC-VQA/images/PMC7648378_Fig1_293222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is used to show the cross-table view of the acetabular component? \nChoices:\n(A) anteroposterior view \n(B) oblique view \n(C) lateral view \n(D) ultrasound view " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.237304210662842 }, { "id": "identity_324316", "image": "A-OKVQA/images/187803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is situated in the corner?\nChoices:\n(A) trash\n(B) couch\n(C) stairs\n(D) lamp" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.875, "loss": 4.349286079406738 }, { "id": "identity_16414", "image": "FigureQA/images/79752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 4.124710559844971 }, { "id": "identity_127265", "image": "Super-CLEVR/images/superCLEVR_new_001361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the vehicles on the right side of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 23.0625, "loss": 4.353447914123535 }, { "id": "identity_303629", "image": "ScienceQA/images/2066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this item?\nChoices:\n(A) sour\n(B) sticky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.296875, "loss": 4.257415771484375 }, { "id": "identity_189628", "image": "VQA2.0/images/513980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of road sign is directly above the motorcycle?" }, { "from": "gpt", "value": "The answer is Left curve warning" } ], "clip_score": 26.640625, "loss": 5.166618824005127 }, { "id": "identity_266683", "image": "VizWiz/images/VizWiz_train_00019928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide information about the signery?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 13.1640625, "loss": 4.025967121124268 }, { "id": "identity_263803", "image": "PMC-VQA/images/PMC3418000_F3_149477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image shown?\nChoices:\n(A) a normal brain\n(B) a skull fracture\n(C) a rib cage injury\n(D) a spinal cord damage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.604669094085693 }, { "id": "identity_302539", "image": "A-OKVQA/images/103393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is to the left of this?\nChoices:\n(A) helicopter\n(B) bicycle\n(C) television\n(D) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.408735752105713 }, { "id": "identity_336716", "image": "TextVQA/images/4f491de9b8cb2f78.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the text in red indicate?" }, { "from": "gpt", "value": "The answer is what people in tourism should now about vieal and social media amrk. " } ], "clip_score": 16.671875, "loss": 4.901835918426514 }, { "id": "identity_280466", "image": "FigureQA/images/61501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.359375, "loss": 4.154571056365967 }, { "id": "identity_24897", "image": "VQA2.0/images/208882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of candy is in the red and green bag?" }, { "from": "gpt", "value": "The answer is cannot tell" } ], "clip_score": 9.1328125, "loss": 4.856438159942627 }, { "id": "identity_5687", "image": "TQA/images/abc_question_images/human_system_circulatory_11381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the lung?\nChoices:\n(A) L\n(B) R\n(C) C\n(D) W" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.390625, "loss": 4.529026985168457 }, { "id": "identity_29034", "image": "PMC-VQA/images/PMC6889661_Fig1_77615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the location of the largest lesion? \nChoices:\n(A) Right superior frontal gyrus \n(B) Left superior frontal gyrus \n(C) Left inferior frontal gyrus \n(D) Right inferior frontal gyrus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.465184688568115 }, { "id": "identity_325902", "image": "Super-CLEVR/images/superCLEVR_new_001873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the other object that is the same shape as the big gray metallic thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 16.703125, "loss": 4.0136566162109375 }, { "id": "identity_296080", "image": "ScienceQA/images/9075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Illinois?\nChoices:\n(A) Springfield\n(B) Saint Paul\n(C) Cleveland\n(D) Topeka" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 4.066791534423828 }, { "id": "identity_76845", "image": "TQA/images/abc_question_images/parts_skeleton_17256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bones are labeled in the lower limb?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 30.46875, "loss": 4.276576995849609 }, { "id": "identity_305431", "image": "A-OKVQA/images/64116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person called who would take care of these animals?\nChoices:\n(A) zookeeper\n(B) breeder\n(C) manager\n(D) shepard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.2578125, "loss": 4.197639465332031 }, { "id": "identity_299179", "image": "A-OKVQA/images/267042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of label is affixed to the desk?\nChoices:\n(A) instructional\n(B) directional\n(C) brand\n(D) regulatory" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.1875, "loss": 4.284197807312012 }, { "id": "identity_207358", "image": "VQA2.0/images/97078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.765625, "loss": 4.078850269317627 }, { "id": "identity_335350", "image": "A-OKVQA/images/406253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What keeps the scooter upright when it's parked?\nChoices:\n(A) gearshift\n(B) kickstand\n(C) parking rack\n(D) brake pedal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.25, "loss": 4.186619281768799 }, { "id": "identity_263487", "image": "PMC-VQA/images/PMC1803030_pone-0000276-g002_9682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the values on the left side of the topographic plots indicate?\nChoices:\n(A) Age range \n(B) Gender distribution \n(C) Power distribution \n(D) Electrode position" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.40625, "loss": 4.798101425170898 }, { "id": "identity_17139", "image": "ScienceQA/images/17235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 11 grams of water vapor per kilogram of air\n(B) 12 grams of water vapor per kilogram of air\n(C) 4 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.078125, "loss": 4.077005863189697 }, { "id": "identity_15672", "image": "TextVQA/images/19703a54731b9656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the sign above the coffee pot say?" }, { "from": "gpt", "value": "The answer is a" } ], "clip_score": 27.84375, "loss": 5.75143575668335 }, { "id": "identity_32959", "image": "A-OKVQA/images/204188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this toilet located?\nChoices:\n(A) car\n(B) bathroom\n(C) kitchen\n(D) expo" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.5625, "loss": 4.124995231628418 }, { "id": "identity_17214", "image": "AI2D/images/4351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fruit is represented by letter L?\nChoices:\n(A) banana\n(B) apricot\n(C) plum\n(D) lychee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.1875, "loss": 4.782855987548828 }, { "id": "identity_136134", "image": "DVQA/images/bar_train_00033937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the lowest total sales across all stores?" }, { "from": "gpt", "value": "The answer is air" } ], "clip_score": 27.546875, "loss": 4.584661483764648 }, { "id": "identity_61357", "image": "DVQA/images/bar_train_00000151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the 'income' algorithm performed the worst on?" }, { "from": "gpt", "value": "The answer is crisis" } ], "clip_score": 29.234375, "loss": 4.851305961608887 }, { "id": "identity_158916", "image": "AI2D/images/1293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages of growth are depicted in this image?" }, { "from": "gpt", "value": "The answer is Four stages" } ], "clip_score": 29.671875, "loss": 4.355888843536377 }, { "id": "identity_223675", "image": "TextVQA/images/9996925c13fc2f3a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company that makes the product advertised?" }, { "from": "gpt", "value": "The answer is S.Anargyros" } ], "clip_score": 13.7421875, "loss": 4.301621913909912 }, { "id": "identity_180110", "image": "IconQA/images/40998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which picture is second from the right?" }, { "from": "gpt", "value": "The answer is A dog" } ], "clip_score": 24.328125, "loss": 4.643610000610352 }, { "id": "identity_20377", "image": "ScienceQA/images/15914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes the Scarborough Marsh ecosystem?\nChoices:\n(A) It has land that is covered with water during most of the year. It also has soil that is poor in nutrients.\n(B) It has soil that is rich in nutrients. It also has other water ecosystems nearby." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.8125, "loss": 4.564202785491943 }, { "id": "identity_288011", "image": "IconQA/images/65513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) impossible\n(B) probable\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.562169551849365 }, { "id": "identity_322106", "image": "PMC-VQA/images/PMC5915579_Fig6_305761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the axial planes in relation to the prostate in the images shown?\nChoices:\n(A) horizontal planes intersecting the prostate from top to bottom\n(B) vertical planes intersecting the prostate from front to back\n(C) oblique planes intersecting the prostate at an angle\n(D) circular planes encompassing the prostate from all sides" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.007890224456787 }, { "id": "identity_217980", "image": "A-OKVQA/images/437180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the shirt of the person in the foreground?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 17.796875, "loss": 4.648880481719971 }, { "id": "identity_37763", "image": "A-OKVQA/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the standing man doing with his arms?\nChoices:\n(A) lifting\n(B) hiding\n(C) folding\n(D) waving" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.9453125, "loss": 4.4855217933654785 }, { "id": "identity_7318", "image": "FigureQA/images/62147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.328125, "loss": 4.245859146118164 }, { "id": "identity_108932", "image": "VQA-AS/images/160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many butterflies are in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.546875, "loss": 4.395025253295898 }, { "id": "identity_21320", "image": "PMC-VQA/images/PMC5964188_F3_317518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the kidney is shown in the image? \nChoices:\n(A) Glomerulus and proximal contorted tubules \n(B) Collecting ducts \n(C) Loop of Henle \n(D) Distal convoluted tubules " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.65625, "loss": 4.024598121643066 }, { "id": "identity_278461", "image": "PMC-VQA/images/PMC4369826_Fig2_370506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In ultrasonography, what kind of tumor is typically observed?\nChoices:\n(A) Predominantly cystic\n(B) Predominantly solid\n(C) Half cystic and half solid\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 4.127486228942871 }, { "id": "identity_317558", "image": "A-OKVQA/images/69856.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of the state shown here?\nChoices:\n(A) manhattan\n(B) buffalo\n(C) albany\n(D) poughkeepsie" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5859375, "loss": 4.295088768005371 }, { "id": "identity_20070", "image": "PMC-VQA/images/PMC4101959_fig6_306880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the patient's trauma? \nChoices:\n(A) Left hemisphere injury \n(B) Acute left hemiplegia \n(C) Acute right hemiplegia \n(D) Occipital lobe damage " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.984375, "loss": 4.89910364151001 }, { "id": "identity_29372", "image": "VQA2.0/images/236593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the bike parked?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.234375, "loss": 4.122250556945801 }, { "id": "identity_289515", "image": "DocVQA/images/rxhm0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the executive secretary?" }, { "from": "gpt", "value": "The answer is channing H. Lushbough" } ], "clip_score": 16.84375, "loss": 4.731597423553467 }, { "id": "identity_133135", "image": "Super-CLEVR/images/superCLEVR_new_000608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is closest to the bottom edge of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 17.6875, "loss": 4.320531368255615 }, { "id": "identity_214822", "image": "TextVQA/images/2d7c6aae590d63f8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the outfield wall?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.046875, "loss": 4.07654333114624 }, { "id": "identity_37394", "image": "A-OKVQA/images/577835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of truck is this?\nChoices:\n(A) model\n(B) ladder\n(C) commercial\n(D) passenger" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.890625, "loss": 4.26945686340332 }, { "id": "identity_211241", "image": "PMC-VQA/images/PMC8084928_Fig7_430849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed in the top row?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.03125, "loss": 4.593092918395996 }, { "id": "identity_37174", "image": "Super-CLEVR/images/superCLEVR_new_000992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a large red object; are there any large matte objects on the left side of it?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.578125, "loss": 5.299043655395508 }, { "id": "identity_335664", "image": "A-OKVQA/images/382873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the seat on top of the animal called?\nChoices:\n(A) saddle\n(B) gondola\n(C) howdah\n(D) chaise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.515625, "loss": 4.2038702964782715 }, { "id": "identity_263950", "image": "AI2D/images/117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter B represent in the following options: (A) North Pole, (B) Moon, (C) Equator, (D) South Pole?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.9375, "loss": 4.331268310546875 }, { "id": "identity_193727", "image": "Super-CLEVR/images/superCLEVR_new_001495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.234375, "loss": 4.729423999786377 }, { "id": "identity_118432", "image": "TextVQA/images/f19424b9835199db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many panels show the character dressed as Santa Claus?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.8125, "loss": 4.311647415161133 }, { "id": "identity_279212", "image": "A-OKVQA/images/67749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman attempting to engage in?\nChoices:\n(A) cross street\n(B) board bus\n(C) board car\n(D) jog" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.75, "loss": 4.4826836585998535 }, { "id": "identity_272470", "image": "TextVQA/images/49ff319bac6d6611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of computers is contained in the white boxes?" }, { "from": "gpt", "value": "The answer is combo" } ], "clip_score": 20.84375, "loss": 4.6075825691223145 }, { "id": "identity_210931", "image": "A-OKVQA/images/214059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ground crew members can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.703125, "loss": 4.5180888175964355 }, { "id": "identity_328021", "image": "A-OKVQA/images/217636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this surface made of?\nChoices:\n(A) asphalt\n(B) concrete\n(C) pavement\n(D) cobblestone" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.078125, "loss": 4.18612003326416 }, { "id": "identity_137691", "image": "MapQA/images/map_4270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of senators between 24.0% and 24.7%?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 32.84375, "loss": 4.13306999206543 }, { "id": "identity_289169", "image": "PMC-VQA/images/PMC2811103_F1_55443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the figure by selecting the correct option:\n(A) Brain MRI\n(B) Brain scan\n(C) Brain anatomy\n(D) Brain drawing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.28125, "loss": 4.50832986831665 }, { "id": "identity_25106", "image": "ScienceQA/images/4286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Carson City\n(B) Boise\n(C) Sacramento\n(D) Reno" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.124028205871582 }, { "id": "identity_301283", "image": "FigureQA/images/5622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.328125, "loss": 4.1177263259887695 }, { "id": "identity_275351", "image": "VQA-RAD/images/synpic34515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the colon the location of feces?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.765625, "loss": 4.099349498748779 }, { "id": "identity_306068", "image": "A-OKVQA/images/507966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who provided these items?\nChoices:\n(A) beach owner\n(B) homeless people\n(C) salvation army\n(D) beach goers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 4.538392066955566 }, { "id": "identity_262027", "image": "A-OKVQA/images/507889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of area from the following options: stadium, port, beach, or backyard.\nChoices:\n(A) stadium\n(B) port\n(C) beach\n(D) backyard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.2925920486450195 }, { "id": "identity_265906", "image": "DVQA/images/bar_train_00077353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the lime color?" }, { "from": "gpt", "value": "The answer is dozen" } ], "clip_score": 20.265625, "loss": 6.0452775955200195 }, { "id": "identity_262386", "image": "TextVQA/images/17ce215d6f745b28.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the blue can?" }, { "from": "gpt", "value": "The answer is duster" } ], "clip_score": 13.90625, "loss": 4.79272985458374 }, { "id": "identity_30972", "image": "PMC-VQA/images/PMC4330230_Fig8_358741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the image? \nChoices:\n(A) Nerves \n(B) Bones \n(C) Veins \n(D) Arteries " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.421875, "loss": 4.488143444061279 }, { "id": "identity_287787", "image": "IconQA/images/96968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.359375, "loss": 4.1230926513671875 }, { "id": "identity_296772", "image": "AI2D/images/2387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When can the butterfly lay eggs? \nOptions:\n(A) Adult\n(B) Larvae\n(C) Pupa\n(D) Nymph" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.6875, "loss": 4.112133502960205 }, { "id": "identity_329827", "image": "TextVQA/images/2be1beb5b96dbe54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this?" }, { "from": "gpt", "value": "The answer is infinity" } ], "clip_score": 13.6875, "loss": 4.936210632324219 }, { "id": "identity_27922", "image": "PMC-VQA/images/PMC6037921_F3_339964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the mass described in the caption? \nChoices:\n(A) Left anterior temporal lobe \n(B) Right anterior temporal lobe \n(C) Posterior temporal lobe \n(D) Frontal lobe. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.046875, "loss": 4.717370986938477 }, { "id": "identity_115732", "image": "VQA2.0/images/239831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tail of the aircraft?" }, { "from": "gpt", "value": "The answer is Dark blue" } ], "clip_score": 23.03125, "loss": 4.3948235511779785 }, { "id": "identity_299365", "image": "A-OKVQA/images/483424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of water body is known for holding a large number of rowboats? \n(A) river \n(B) lake \n(C) sea \n(D) ocean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.015625, "loss": 4.211522102355957 }, { "id": "identity_309019", "image": "IconQA/images/99112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the count of shapes even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.78125, "loss": 4.111532688140869 }, { "id": "identity_43297", "image": "MapQA/images/map_9285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the highest percentage rate in the USA?" }, { "from": "gpt", "value": "The answer is Dark Red" } ], "clip_score": 32.15625, "loss": 4.002505779266357 }, { "id": "identity_278561", "image": "VizWiz/images/VizWiz_train_00005316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you provide a description of the photo, including any text that is visible?" }, { "from": "gpt", "value": "The answer is flyer for matial arts classes" } ], "clip_score": 17.53125, "loss": 4.213925361633301 }, { "id": "identity_11288", "image": "DocVQA/images/mkvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which stage is the Project 'Totelle Combined Monograph'?" }, { "from": "gpt", "value": "The answer is Under development by DesignWrite based on parthenon draft" } ], "clip_score": 16.703125, "loss": 5.105294227600098 }, { "id": "identity_336906", "image": "A-OKVQA/images/17272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of all these umbrellas?\nChoices:\n(A) stop rain\n(B) for hiding\n(C) for flying\n(D) deflect sunlight" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.0, "loss": 4.472704887390137 }, { "id": "identity_27059", "image": "TextVQA/images/dd281aa1b2204a96.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is shown?" }, { "from": "gpt", "value": "The answer is aquintus" } ], "clip_score": 19.140625, "loss": 4.395674705505371 }, { "id": "identity_312201", "image": "PMC-VQA/images/PMC6833318_RSIF20190454F2_62785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does zero represent on the x-axis of this graph?\nChoices:\n(A) the top of the stem \n(B) the left of the stem \n(C) the centre of the stem \n(D) the right of the stem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.9765625, "loss": 4.281889915466309 }, { "id": "identity_278077", "image": "AI2D/images/2111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the primary producer in the given diagram:\n(A) fox\n(B) snake\n(C) rabbit\n(D) grass" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.65625, "loss": 4.145864009857178 }, { "id": "identity_35882", "image": "PMC-VQA/images/PMC4182018_fig3_324274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the hypointensity shown in the image? \nChoices:\n(A) Calcification \n(B) Hemorrhage \n(C) Infection \n(D) Artefact " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.624614238739014 }, { "id": "identity_328543", "image": "DVQA/images/bar_train_00172652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first stack of bars here?" }, { "from": "gpt", "value": "The answer is fence" } ], "clip_score": 27.03125, "loss": 4.309032440185547 }, { "id": "identity_249232", "image": "A-OKVQA/images/539299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the green nets and how do they relate to the visibility provided by the men's orange vests?" }, { "from": "gpt", "value": "The answer is To guide animals and ensure coordination among men" } ], "clip_score": 31.75, "loss": 4.656437397003174 }, { "id": "identity_290832", "image": "DVQA/images/bar_train_00166662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In any given category, which object is the most favored?" }, { "from": "gpt", "value": "The answer is lack" } ], "clip_score": 23.90625, "loss": 4.694939613342285 }, { "id": "identity_266342", "image": "FigureQA/images/12189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orchid the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.109375, "loss": 4.461228370666504 }, { "id": "identity_102305", "image": "CLEVR-Math/images/CLEVR_train_037448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest cylinder?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 29.34375, "loss": 4.596926689147949 }, { "id": "identity_271052", "image": "FigureQA/images/65627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Indigo the toughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.703125, "loss": 4.135123252868652 }, { "id": "identity_199708", "image": "A-OKVQA/images/528713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.1875, "loss": 4.860030174255371 }, { "id": "identity_290771", "image": "AI2D/images/2692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What season is represented by the letter D in the diagram?\nChoices:\n(A) summer\n(B) autumn\n(C) spring\n(D) winter" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.427107810974121 }, { "id": "identity_19974", "image": "DocVQA/images/qhfd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the program director at KMOX Radio?" }, { "from": "gpt", "value": "The answer is James Butler" } ], "clip_score": 18.1875, "loss": 4.025533199310303 }, { "id": "identity_79888", "image": "Geometry3K/images/152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the side labeled 2x - 5?" }, { "from": "gpt", "value": "The answer is The length is -265" } ], "clip_score": 30.359375, "loss": 4.3623223304748535 }, { "id": "identity_276264", "image": "FigureQA/images/2430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in shade, Web Maroon or Navy Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.84375, "loss": 4.560708045959473 }, { "id": "identity_32449", "image": "Super-CLEVR/images/superCLEVR_new_001143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a motorbike that is in front of the cyan rubber scooter; is its size the same as the cyan rubber object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.21875, "loss": 5.104348182678223 }, { "id": "identity_136784", "image": "MapQA/images/map_4764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the percentage range of fans in California?" }, { "from": "gpt", "value": "The answer is Light brown" } ], "clip_score": 34.0625, "loss": 4.44080114364624 }, { "id": "identity_268672", "image": "PMC-VQA/images/PMC8430342_fig1_55437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the parameter being assessed in the M-mode long-axis images? Is it:\n(A) Left ventricular internal diameter during systole (LVIDs)\n(B) Left ventricular posterior wall thickness (LVPW)\n(C) Left ventricular internal diameter during diastole (LVIDd)\n(D) Fractional shortening (FS)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.828125, "loss": 4.56264591217041 }, { "id": "identity_35265", "image": "ScienceQA/images/35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Columbia\n(B) Montgomery\n(C) Charleston\n(D) Harrisburg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.484375, "loss": 4.2589945793151855 }, { "id": "identity_30792", "image": "A-OKVQA/images/294814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animals are shown?\nChoices:\n(A) tiger\n(B) lion\n(C) horse\n(D) zebra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.84375, "loss": 4.206182479858398 }, { "id": "identity_338452", "image": "IconQA/images/92511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of this?\nChoices:\n(A) circle\n(B) square" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.546875, "loss": 4.04923152923584 }, { "id": "identity_231824", "image": "Super-CLEVR/images/superCLEVR_new_000713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image, and how many of them are either brown or gray?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.1015625, "loss": 4.283580303192139 }, { "id": "identity_129746", "image": "TextVQA/images/075d182b04a459de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many subdials are present on this watch face?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.28125, "loss": 4.1318254470825195 }, { "id": "identity_286713", "image": "TabMWP/images/28927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.495229244232178 }, { "id": "identity_191108", "image": "PMC-VQA/images/PMC3038968_F1_87161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the density of the tumor compared to the surrounding tissues?" }, { "from": "gpt", "value": "The answer is Density description" } ], "clip_score": 23.703125, "loss": 4.061254501342773 }, { "id": "identity_301621", "image": "AI2D/images/1469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the bottom cavity in the diagram?\nChoices:\n(A) magma chamber\n(B) side vent\n(C) crater\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.875, "loss": 4.020288467407227 }, { "id": "identity_25593", "image": "A-OKVQA/images/392666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal statues are sitting on the chair?\nChoices:\n(A) cat\n(B) mouse\n(C) bird\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6875, "loss": 4.157905101776123 }, { "id": "identity_332021", "image": "A-OKVQA/images/99681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most important procedure in this environment?\nChoices:\n(A) lunch\n(B) schedule\n(C) timesheet\n(D) safety" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.2421875, "loss": 4.378972053527832 }, { "id": "identity_22673", "image": "A-OKVQA/images/435179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be the most useful material for adding to the boat in this image?\nChoices:\n(A) child\n(B) block\n(C) sticks\n(D) clothes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.375, "loss": 4.633841037750244 }, { "id": "identity_290159", "image": "PMC-VQA/images/PMC4533796_Fig6_413971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the specimen displayed in the image. Is it:\n(A) mice tumors\n(B) human tumors\n(C) mice brain tissue\n(D) human brain tissue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.34375, "loss": 4.584906578063965 }, { "id": "identity_243714", "image": "IconQA/images/27684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fifth picture after the first fish?" }, { "from": "gpt", "value": "The answer is sub" } ], "clip_score": 21.03125, "loss": 5.6245598793029785 }, { "id": "identity_94928", "image": "IconQA/images/35317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more rectangles in the top row or in the bottom row?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 33.25, "loss": 4.0172553062438965 }, { "id": "identity_298149", "image": "PMC-VQA/images/PMC5394395_fig4_153636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the classification of the avulsion fracture?\nChoices:\n(A) Grade II\n(B) Grade III\n(C) Grade IV\n(D) Grade V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.40625, "loss": 4.060439109802246 }, { "id": "identity_218928", "image": "TextVQA/images/e9e782b7dc704424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the display system as indicated by the title?" }, { "from": "gpt", "value": "The answer is To show individuals' locations" } ], "clip_score": 30.09375, "loss": 4.7068610191345215 }, { "id": "identity_305689", "image": "Super-CLEVR/images/superCLEVR_new_000695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small green matte vehicles than other types?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.46875, "loss": 5.3543219566345215 }, { "id": "identity_300190", "image": "ChartQA/images/two_col_100701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the amount go up or down?" }, { "from": "gpt", "value": "The answer is increasing" } ], "clip_score": 18.609375, "loss": 4.226223945617676 }, { "id": "identity_115210", "image": "TextVQA/images/3fbdd8ec7e440208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of music entertainment is being advertised on the flyer?" }, { "from": "gpt", "value": "The answer is The best music from our DJs" } ], "clip_score": 19.453125, "loss": 4.325949668884277 }, { "id": "identity_299831", "image": "PMC-VQA/images/PMC7360306_F2_213981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific location of the new cavity on the left lower lobe? Is it in the posterior basal segment, medial segment, anterior basal segment, or superior segment?\nChoices:\n(A) posterior basal segment \n(B) medial segment \n(C) anterior basal segment \n(D) superior segment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.890625, "loss": 4.496682643890381 }, { "id": "identity_24751", "image": "PMC-VQA/images/PMC7982529_F4_396784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is causing the staining at the left caudothalamic notch? \nChoices:\n(A) Calcification \n(B) Hemorrhage \n(C) Edema \n(D) Atrophy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.734375, "loss": 5.036074638366699 }, { "id": "identity_31732", "image": "A-OKVQA/images/374520.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent headquarters this airline company?\nChoices:\n(A) asia\n(B) europe\n(C) north america\n(D) africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.207373142242432 }, { "id": "identity_315743", "image": "AI2D/images/3314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the outermost part of this structure?\nChoices:\n(A) centriole\n(B) cell membrane\n(C) ribosome\n(D) mitochondria" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.421875, "loss": 4.2275519371032715 }, { "id": "identity_35307", "image": "A-OKVQA/images/236882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the gas tank on the Harley bike in the center of the pack?\nChoices:\n(A) blue\n(B) yellow\n(C) red\n(D) gold" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.828125, "loss": 4.089627265930176 }, { "id": "identity_254068", "image": "A-OKVQA/images/469919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What evidence in the image suggests that the umbrellas are for sale, and how does the variety of colors contribute to their marketability?" }, { "from": "gpt", "value": "The answer is The market setting and display suggest they are for sale" } ], "clip_score": 34.1875, "loss": 4.1585373878479 }, { "id": "identity_189900", "image": "PMC-VQA/images/PMC4038955_F1_292990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct brain images are presented in this figure?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.25, "loss": 4.117863655090332 }, { "id": "identity_229959", "image": "VQA2.0/images/539283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the feathers on a bird's breast, and how does this function benefit the bird in its natural habitat?" }, { "from": "gpt", "value": "The answer is Insulation and protection for survival and thriving" } ], "clip_score": 24.203125, "loss": 4.052361965179443 }, { "id": "identity_18845", "image": "DVQA/images/bar_train_00183508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the left?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 21.453125, "loss": 4.406264781951904 }, { "id": "identity_287955", "image": "VQA2.0/images/64619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the glasses?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 10.40625, "loss": 4.218605041503906 }, { "id": "identity_32030", "image": "IconQA/images/73427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bucket. Which picture is second?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.28125, "loss": 4.248032569885254 }, { "id": "identity_76240", "image": "ScienceQA/images/10687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rotors are visible on the drone in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.8125, "loss": 4.112884998321533 }, { "id": "identity_221674", "image": "A-OKVQA/images/118485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tag attached to the plant?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.71875, "loss": 4.466696739196777 }, { "id": "identity_105003", "image": "Super-CLEVR/images/superCLEVR_new_000579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Black and yellow" } ], "clip_score": 23.40625, "loss": 4.252011299133301 }, { "id": "identity_12236", "image": "A-OKVQA/images/114540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure where the people are riding?\nChoices:\n(A) highway\n(B) gangplank\n(C) quay\n(D) overpass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.328125, "loss": 4.270489692687988 }, { "id": "identity_116144", "image": "PMC-VQA/images/PMC4201736_Fig2_329091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the dashed lines in the image?" }, { "from": "gpt", "value": "The answer is To measure lumbar lordosis angle" } ], "clip_score": 29.859375, "loss": 4.171417713165283 }, { "id": "identity_326727", "image": "A-OKVQA/images/158787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two people in the foreground carrying?\nChoices:\n(A) pumpkins\n(B) purses\n(C) eggs\n(D) cats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.1171875, "loss": 4.5344438552856445 }, { "id": "identity_192871", "image": "TextVQA/images/6248e31cc896558e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light for the direction facing the camera?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 27.21875, "loss": 4.1979498863220215 }, { "id": "identity_213035", "image": "PMC-VQA/images/PMC6826538_genes-10-00762-f002_60464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shape of the objects in the main image?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 16.640625, "loss": 4.053123474121094 }, { "id": "identity_148353", "image": "DocVQA/images/ftmf0024_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What companies are mentioned in the footnote?" }, { "from": "gpt", "value": "The answer is RJR, Beckel Cowen, Ramhurst, Smith Johnson Anderson & Carr, and Bob Burnette" } ], "clip_score": 24.90625, "loss": 4.087153911590576 }, { "id": "identity_305777", "image": "ScienceQA/images/5869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which creature is also capable of flying?\nChoices:\n(A) nilgai\n(B) gray-headed albatross" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.1548566818237305 }, { "id": "identity_28412", "image": "PMC-VQA/images/PMC5622315_F14_222461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the abscess? \nChoices:\n(A) Left frontal \n(B) Right frontal \n(C) Occipital \n(D) Temporal " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.078125, "loss": 4.290499687194824 }, { "id": "identity_331564", "image": "PMC-VQA/images/PMC6971865_Fig3_103405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to detect this mass?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 4.546050071716309 }, { "id": "identity_332637", "image": "DocVQA/images/pywf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Connection ID mentioned in this report?" }, { "from": "gpt", "value": "The answer is HUA MEI" } ], "clip_score": 25.546875, "loss": 4.35014533996582 }, { "id": "identity_35935", "image": "A-OKVQA/images/20133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The pink and white item will provide what?\nChoices:\n(A) power\n(B) music\n(C) games\n(D) cell service" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.40625, "loss": 4.353515148162842 }, { "id": "identity_287453", "image": "Super-CLEVR/images/superCLEVR_new_001295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more tiny cars in front of the double bus than tiny blue matte regular cars?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.078125, "loss": 5.090889930725098 }, { "id": "identity_246811", "image": "PMC-VQA/images/PMC7427865_Fig1_234354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary purpose of the medical procedure depicted in the image, and which organ is primarily targeted by this procedure?" }, { "from": "gpt", "value": "The answer is To block blood flow to the kidney" } ], "clip_score": 24.03125, "loss": 4.111467361450195 }, { "id": "identity_270772", "image": "VQA-AS/images/6034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is she located on the table?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.765625, "loss": 4.080925941467285 }, { "id": "identity_315717", "image": "Super-CLEVR/images/superCLEVR_new_001751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the purple thing and the big red object have the same shape?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.078125, "loss": 5.6253461837768555 }, { "id": "identity_26093", "image": "A-OKVQA/images/286930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the player wearing red boots?\nChoices:\n(A) fielder\n(B) pitcher\n(C) catcher\n(D) goalie" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.34375, "loss": 4.336795806884766 }, { "id": "identity_297401", "image": "FigureQA/images/19039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Rebecca Purple or Magenta?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.703125, "loss": 4.354762077331543 }, { "id": "identity_292634", "image": "PMC-VQA/images/PMC7757331_F3_325476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to acquire the mentioned images? \nOptions:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.984375, "loss": 4.142105579376221 }, { "id": "identity_252346", "image": "A-OKVQA/images/206975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of transportation infrastructure can be identified in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Two types - railway and road" } ], "clip_score": 27.234375, "loss": 4.688450336456299 }, { "id": "identity_278726", "image": "TQA/images/question_images/human_system_nervous_6173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With what is the pituitary gland connected?\nChoices:\n(A) ventricles\n(B) choroid plexus\n(C) hypothalamus\n(D) cerebrum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.515625, "loss": 4.006339073181152 }, { "id": "identity_171275", "image": "IconQA/images/4870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more triangles with their base at the bottom or at the top?" }, { "from": "gpt", "value": "The answer is The same number" } ], "clip_score": 26.359375, "loss": 4.3821492195129395 }, { "id": "identity_215163", "image": "TextVQA/images/d91417b34c914c9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the district located at the top right corner of the floor plan?" }, { "from": "gpt", "value": "The answer is Counter District" } ], "clip_score": 23.375, "loss": 4.416846752166748 }, { "id": "identity_320237", "image": "PMC-VQA/images/PMC7719498_fig1_315933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the X-ray indicate about the medial clear space?\n\nChoices:\n(A) Decreased\n(B) Increased\n(C) Normal\n(D) Irrelevant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.421875, "loss": 4.48590612411499 }, { "id": "identity_321986", "image": "A-OKVQA/images/424907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they on the sidewalk?\nChoices:\n(A) lost\n(B) taking bus\n(C) taking cab\n(D) socializing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 4.155389785766602 }, { "id": "identity_113781", "image": "VizWiz/images/VizWiz_train_00002208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows can be seen on the couch?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.78125, "loss": 4.59506368637085 }, { "id": "identity_299442", "image": "A-OKVQA/images/177253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which aisle of the grocery store is the man in the gray shirt currently shopping? \nChoices:\n(A) produce\n(B) wine\n(C) milk\n(D) meat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.25, "loss": 4.3592658042907715 }, { "id": "identity_49978", "image": "DVQA/images/bar_train_00051904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest average value across all groups?" }, { "from": "gpt", "value": "The answer is humor" } ], "clip_score": 19.5625, "loss": 4.9985671043396 }, { "id": "identity_253059", "image": "Super-CLEVR/images/superCLEVR_new_001005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same material as the big gray motorbike, and what are their sizes?" }, { "from": "gpt", "value": "The answer is Two objects; large and small" } ], "clip_score": 25.625, "loss": 4.433789253234863 }, { "id": "identity_298213", "image": "PMC-VQA/images/PMC6500019_Fig1_475204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the defining feature of the lesions?\nChoices:\n(A) Solid lesions with thick wall\n(B) Solid lesions with thin wall\n(C) Cystic lesions with thick wall\n(D) Cystic lesions with thin wall" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.984375, "loss": 4.431389808654785 }, { "id": "identity_116828", "image": "PMC-VQA/images/PMC6722553_nutrients-11-01890-f001_32822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a scale bar present in the image, and if so, what is its color?" }, { "from": "gpt", "value": "The answer is Yes, white" } ], "clip_score": 15.9375, "loss": 4.249940395355225 }, { "id": "identity_270784", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the big purple object located behind the small gray scooter?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 27.828125, "loss": 4.367368698120117 }, { "id": "identity_267375", "image": "PMC-VQA/images/PMC5700104_Fig2_243135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dip in the plot represent?\nChoices:\n(A) Increased prey growth\n(B) Decreased prey growth\n(C) Increased predator population\n(D) Decreased predator population" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.715494632720947 }, { "id": "identity_214080", "image": "Super-CLEVR/images/superCLEVR_new_001014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is both in front of the blue thing and on the right side of the tiny tandem bike?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.75, "loss": 4.235796928405762 }, { "id": "identity_329970", "image": "PMC-VQA/images/PMC3038968_F1_87161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan reveal about the tumor?\nChoices:\n(A) It is located in the upper abdomen\n(B) It is a small, even tumor\n(C) It is large and uneven\n(D) It is located in the back" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5625, "loss": 4.275763511657715 }, { "id": "identity_68703", "image": "ScienceQA/images/489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is marked with an orange dot on the map?" }, { "from": "gpt", "value": "The answer is Kansas" } ], "clip_score": 27.421875, "loss": 4.106952667236328 }, { "id": "identity_32578", "image": "PMC-VQA/images/PMC6000605_Figure1_328705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the sublobe mass located? \nChoices:\n(A) Right thoracic cavity \n(B) Abdominal cavity \n(C) Left thoracic cavity \n(D) Pelvic cavity " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.015625, "loss": 4.402355194091797 }, { "id": "identity_11574", "image": "Super-CLEVR/images/superCLEVR_new_001375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a matte dirtbike; is it the same size as the gray object that is to the left of the blue metal bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.484375, "loss": 4.689866542816162 }, { "id": "identity_284686", "image": "PMC-VQA/images/PMC7880958_Fig1_367093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scan was utilized to identify the misinterpretation? \nOptions:\n(A) CT scan\n(B) MRI\n(C) PET/CT scan\n(D) DWI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.02560567855835 }, { "id": "identity_293490", "image": "TextVQA/images/7f6c3823509de616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of food does this restaurant serve?" }, { "from": "gpt", "value": "The answer is italian " } ], "clip_score": 17.9375, "loss": 4.481832981109619 }, { "id": "identity_294343", "image": "A-OKVQA/images/274541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which electronic devices are they utilizing? Options include cell phone, tablet, desktop computer, and laptop computer.\nChoices:\n(A) cell phone\n(B) tablet\n(C) desktop computer\n(D) laptop computer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.515625, "loss": 4.348315238952637 }, { "id": "identity_33009", "image": "PMC-VQA/images/PMC6206383_Fig20_387543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality shown in the CT angiogram images? \nChoices:\n(A) Lesion surrounding the left internal carotid artery \n(B) Lesion surrounding the right internal carotid artery \n(C) Lesion surrounding the left vertebral artery \n(D) Lesion surrounding the right vertebral artery. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.258253574371338 }, { "id": "identity_293468", "image": "FigureQA/images/12362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Periwinkle or Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.59375, "loss": 4.569521903991699 }, { "id": "identity_334549", "image": "A-OKVQA/images/314616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is occurring in the image?\nChoices:\n(A) thunderstorm\n(B) flooding\n(C) cow showering\n(D) raining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.461800575256348 }, { "id": "identity_296718", "image": "TQA/images/abc_question_images/earth_poles_12649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Arctic Circle in the given choices: N, W, E, or F?\nChoices:\n(A) N\n(B) W\n(C) E\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.65625, "loss": 4.261267185211182 }, { "id": "identity_319808", "image": "PMC-VQA/images/PMC2734940_fig2_44592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the central dot in this represent in the MRI image?\nChoices:\n(A) Calcifications \n(B) Scolex \n(C) Fluid \n(D) Tumor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.546875, "loss": 5.271912574768066 }, { "id": "identity_256675", "image": "DocVQA/images/fpjh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was Kermit W. Beal's occupation in 1955, and where was he employed at that time?" }, { "from": "gpt", "value": "The answer is Master mechanic in Findlay" } ], "clip_score": 26.25, "loss": 4.509268760681152 }, { "id": "identity_123781", "image": "TextVQA/images/8b12159c4e90edb2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two-word phrase is written on the left side of the typewriter?" }, { "from": "gpt", "value": "The answer is TAB SET" } ], "clip_score": 32.21875, "loss": 4.011090278625488 }, { "id": "identity_154361", "image": "AI2D/images/1418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many celestial bodies are involved in creating the event shown in the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.96875, "loss": 4.180380344390869 }, { "id": "identity_334797", "image": "A-OKVQA/images/409985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person expressing with their hand gestures?\nChoices:\n(A) situation approval\n(B) angry\n(C) hello\n(D) hatred" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.9609375, "loss": 4.703574180603027 }, { "id": "identity_24846", "image": "A-OKVQA/images/159987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is there a lot of here?\nChoices:\n(A) sand\n(B) dust\n(C) traffic\n(D) snow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.9375, "loss": 4.441201210021973 }, { "id": "identity_11086", "image": "ScienceQA/images/20493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this coat made of?\nChoices:\n(A) wax\n(B) wool" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.144998073577881 }, { "id": "identity_279115", "image": "DVQA/images/bar_train_00017452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.953125, "loss": 4.400506973266602 }, { "id": "identity_315169", "image": "VizWiz/images/VizWiz_train_00014879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code?" }, { "from": "gpt", "value": "The answer is nad6" } ], "clip_score": 25.359375, "loss": 4.061567306518555 }, { "id": "identity_299329", "image": "PMC-VQA/images/PMC6514081_Fig3_478496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the chromosomes?\nChoices:\n(A) Ribosomes\n(B) Membranes\n(C) Chromatin\n(D) Dictyosomes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 4.055632591247559 }, { "id": "identity_205441", "image": "A-OKVQA/images/403862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels can be seen on the train's side?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 21.546875, "loss": 4.70385217666626 }, { "id": "identity_222884", "image": "TextVQA/images/3ae94d088fc0e04d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the theater advertised on the left side of the image?" }, { "from": "gpt", "value": "The answer is Downer Theater" } ], "clip_score": 20.328125, "loss": 4.757914066314697 }, { "id": "identity_119205", "image": "VQA2.0/images/344566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place on the red surface?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 13.8984375, "loss": 4.820748329162598 }, { "id": "identity_310517", "image": "Super-CLEVR/images/superCLEVR_new_000772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the small vehicle that is the same color as the tiny rubber motorbike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.234375, "loss": 4.2550859451293945 }, { "id": "identity_130469", "image": "VQA2.0/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 21.125, "loss": 4.828034400939941 }, { "id": "identity_16156", "image": "DocVQA/images/gncb0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final session of 'PROGRAM OVERVIEW' ?" }, { "from": "gpt", "value": "The answer is TIMING FOR DTS" } ], "clip_score": 24.546875, "loss": 4.199656963348389 }, { "id": "identity_57997", "image": "PlotQA/images/44816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which area has the highest percentage of female teachers in secondary schools?" }, { "from": "gpt", "value": "The answer is High income" } ], "clip_score": 27.859375, "loss": 4.5996174812316895 }, { "id": "identity_333889", "image": "AI2D/images/921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the oak trees were to disappear according to the food chain diagram, what would happen?\nChoices:\n(A) pine decreases\n(B) mouse decreases\n(C) mouse increases\n(D) kinglet increases" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.1875, "loss": 4.124314785003662 }, { "id": "identity_323818", "image": "TextVQA/images/20babd71404bfa95.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this knob on the left control?" }, { "from": "gpt", "value": "The answer is feedback and rotate " } ], "clip_score": 20.03125, "loss": 4.887569427490234 }, { "id": "identity_297653", "image": "A-OKVQA/images/485483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning or representation of the number 84 among the following options: table number, store number, order number, or price?\nChoices:\n(A) table number\n(B) store number\n(C) order number\n(D) price" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6171875, "loss": 4.0540080070495605 }, { "id": "identity_24614", "image": "PMC-VQA/images/PMC2894136_pntd-0000726-g005_67831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the lung shows the abnormality?\nChoices:\n(A) Hilum \n(B) Pleura \n(C) Parenchyma \n(D) Bronchi " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.8125, "loss": 4.411826133728027 }, { "id": "identity_319811", "image": "DocVQA/images/npjy0046_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the information code listed?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.109375, "loss": 4.312361717224121 }, { "id": "identity_19151", "image": "FigureQA/images/49042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.140625, "loss": 4.3122406005859375 }, { "id": "identity_307926", "image": "ScienceQA/images/4936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of this tree?\nChoices:\n(A) silver maple\n(B) black racer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0625, "loss": 5.132174015045166 }, { "id": "identity_150489", "image": "ScienceQA/images/7515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries share the island on which the highlighted country is located?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.75, "loss": 4.216559886932373 }, { "id": "identity_29972", "image": "A-OKVQA/images/237464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do most of the people at the beach hope for today weather wise?\nChoices:\n(A) wind\n(B) sleet\n(C) rain\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.296875, "loss": 4.185168266296387 }, { "id": "identity_26232", "image": "PMC-VQA/images/PMC7796587_Fig7_339434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red fluorescence indicate in the images? \nChoices:\n(A) DNA expression \n(B) Nitrotyrosine expression \n(C) Mitochondrial expression \n(D) No expression. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 4.543816089630127 }, { "id": "identity_126708", "image": "VQA2.0/images/172370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles is the person holding?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.0234375, "loss": 4.205887317657471 }, { "id": "identity_31038", "image": "FigureQA/images/47993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.59375, "loss": 4.463995933532715 }, { "id": "identity_335209", "image": "FigureQA/images/40735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9375, "loss": 4.087287902832031 }, { "id": "identity_31318", "image": "A-OKVQA/images/556406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the zebras doing?\nChoices:\n(A) grazing\n(B) drinking\n(C) mating\n(D) searching" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.65625, "loss": 4.223959922790527 }, { "id": "identity_261897", "image": "DocVQA/images/nqlm0080_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the brand?" }, { "from": "gpt", "value": "The answer is Taste" } ], "clip_score": 17.765625, "loss": 4.615444183349609 }, { "id": "identity_26284", "image": "PMC-VQA/images/PMC7523872_F1_262813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical imaging technique used in this figure? \nChoices:\n(A) CT scan \n(B) X-rays \n(C) PET scan \n(D) MRI scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.546875, "loss": 4.105952262878418 }, { "id": "identity_292171", "image": "DocVQA/images/ftmf0024_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the document?" }, { "from": "gpt", "value": "The answer is Prelaunch Smoker Base Program" } ], "clip_score": 18.03125, "loss": 5.979683876037598 }, { "id": "identity_31773", "image": "A-OKVQA/images/30699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shielding the boy?\nChoices:\n(A) knights shield\n(B) umbrella\n(C) tree\n(D) backpack" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.9453125, "loss": 4.6148786544799805 }, { "id": "identity_33062", "image": "PMC-VQA/images/PMC6414492_acn3725-fig-0001_449424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI showing in the corpus callosum? \nChoices:\n(A) Normal structure \n(B) T1‐hyperintensities \n(C) Large cystic changes with surrounding hyperintense T2 signal abnormality \n(D) T2‐hypointensities " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.0, "loss": 4.037325859069824 }, { "id": "identity_34267", "image": "PMC-VQA/images/PMC4801364_pone.0151376.g002_13330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the pathology seen in the biopsy? \nChoices:\n(A) Inclusion body myositis \n(B) Spinal Muscular Atrophy with Journalistic interest \n(C) Parkinson's Disease \n(D) Alzheimer's disease " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.140625, "loss": 4.314295768737793 }, { "id": "identity_324680", "image": "Super-CLEVR/images/superCLEVR_new_000679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the large object in front of the shiny blue thing the same as the brown thing in front of the bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.109375, "loss": 4.937079906463623 }, { "id": "identity_1279", "image": "A-OKVQA/images/31135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the side for the sandwich served at this restaurant?\nChoices:\n(A) fries\n(B) chips\n(C) mashed potato\n(D) corn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.046875, "loss": 4.177203178405762 }, { "id": "identity_10004", "image": "ScienceQA/images/12272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) New Orleans\n(B) Houston\n(C) Dallas\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.796875, "loss": 4.137993335723877 }, { "id": "identity_291654", "image": "PMC-VQA/images/PMC7519687_Fig3_261740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the arrow mentioned in the caption?\nChoices:\n(A) Green\n(B) Blue\n(C) Red\n(D) Yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.265625, "loss": 4.532120227813721 }, { "id": "identity_312692", "image": "FigureQA/images/2548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.28125, "loss": 4.455258369445801 }, { "id": "identity_157536", "image": "TQA/images/question_images/human_system_nervous_6179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lobes are labeled on the left side of the brain in this diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 34.25, "loss": 4.364901542663574 }, { "id": "identity_311622", "image": "IconQA/images/51712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a bucket. Which image is third?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.189631938934326 }, { "id": "identity_322457", "image": "PMC-VQA/images/PMC6617077_jcm-08-00771-f001_6055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the surface condition of the PEEK samples shown in the images?\n\nChoices:\n(A) Treated\n(B) Untreated\n(C) Painted\n(D) Coated" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.0898919105529785 }, { "id": "identity_294229", "image": "ScienceQA/images/19628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Massachusetts\n(B) Connecticut\n(C) Pennsylvania\n(D) New Hampshire" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 5.125421047210693 }, { "id": "identity_282230", "image": "PMC-VQA/images/PMC6086413_pone.0201430.g004_352601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject matter of the image shown?\nChoices:\n(A) Fossilized remains\n(B) Heated specimens\n(C) Rehydrated cells\n(D) Microscopic organisms" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.3125, "loss": 4.34205436706543 }, { "id": "identity_271949", "image": "Super-CLEVR/images/superCLEVR_new_000933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the rubber thing that matches the color of the small metal thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 20.46875, "loss": 4.133069038391113 }, { "id": "identity_327932", "image": "DocVQA/images/kkxw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the agency involved?" }, { "from": "gpt", "value": "The answer is long haymes carr lintas" } ], "clip_score": 21.765625, "loss": 6.030641078948975 }, { "id": "identity_285599", "image": "AI2D/images/962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter I represent in the figure?\nChoices:\n(A) redbud\n(B) bluebud\n(C) whiteoak\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5625, "loss": 4.327561855316162 }, { "id": "identity_20886", "image": "ScienceQA/images/1561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Florida\n(B) Indiana\n(C) Connecticut\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.484375, "loss": 4.548500061035156 }, { "id": "identity_216604", "image": "VizWiz/images/VizWiz_train_00019896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text on the Sprite cans?" }, { "from": "gpt", "value": "The answer is Green and yellow" } ], "clip_score": 30.25, "loss": 4.096395969390869 }, { "id": "identity_24691", "image": "A-OKVQA/images/511814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the lettering at the side of the large mall building?\nChoices:\n(A) white\n(B) pink\n(C) golden\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2890625, "loss": 4.263274192810059 }, { "id": "identity_45131", "image": "DocVQA/images/yscw0217_234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the decision point in the diagram?" }, { "from": "gpt", "value": "The answer is 1 week reaction period" } ], "clip_score": 25.640625, "loss": 5.146177768707275 }, { "id": "identity_8795", "image": "VQA-AS/images/12174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the tree?" }, { "from": "gpt", "value": "The answer is right" } ], "clip_score": 20.40625, "loss": 4.411684036254883 }, { "id": "identity_119102", "image": "PMC-VQA/images/PMC8550605_Fig37_97814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the clouds in the image?" }, { "from": "gpt", "value": "The answer is Wispy" } ], "clip_score": 20.390625, "loss": 4.271796703338623 }, { "id": "identity_38368", "image": "A-OKVQA/images/258249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Transparent umbrella is used only from protecting?\nChoices:\n(A) rain\n(B) uv\n(C) sun\n(D) wind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.984375, "loss": 4.397896766662598 }, { "id": "identity_23492", "image": "A-OKVQA/images/522461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people sitting in the snow doing?\nChoices:\n(A) waiting\n(B) sledding\n(C) gambling\n(D) skiing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5625, "loss": 4.275693416595459 }, { "id": "identity_8649", "image": "FigureQA/images/92874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.96875, "loss": 4.085839748382568 }, { "id": "identity_335429", "image": "TQA/images/abc_question_images/earth_poles_12649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the arctic circle?\nChoices:\n(A) N\n(B) W\n(C) E\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.058064937591553 }, { "id": "identity_65221", "image": "AI2D/images/3745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organs are visible in this diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.9375, "loss": 4.528955459594727 }, { "id": "identity_286889", "image": "VQA2.0/images/447108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for the horse being in the pasture?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 23.953125, "loss": 5.18754243850708 }, { "id": "identity_295565", "image": "PMC-VQA/images/PMC5659859_FI170328cr-2_232191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the X-ray image?\nChoices:\n(A) A healthy lung \n(B) Abnormal fluid accumulation around the heart \n(C) Distended bowel loops in right hemithorax \n(D) A tumor in the chest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 4.047022819519043 }, { "id": "identity_206550", "image": "A-OKVQA/images/48974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person second from the right wearing?" }, { "from": "gpt", "value": "The answer is Converse" } ], "clip_score": 19.953125, "loss": 4.279082775115967 }, { "id": "identity_319537", "image": "PMC-VQA/images/PMC7555047_diagnostics-10-00645-f013_271682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of nerve is involved in this circular structure?\nChoices:\n(A) medial sural cutaneous nerve \n(B) radial nerve \n(C) lateral sural cutaneous nerve \n(D) femoral nerve" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.921875, "loss": 4.074316501617432 }, { "id": "identity_199463", "image": "PMC-VQA/images/PMC7558323_nanomaterials-10-01870-f0A1_273128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many elements are identified in the EDX map?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.765625, "loss": 4.230343818664551 }, { "id": "identity_278888", "image": "A-OKVQA/images/457725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the officer currently riding?\nChoices:\n(A) parade route\n(B) Brazil\n(C) fire\n(D) perpetrator capture" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.203125, "loss": 4.679844856262207 }, { "id": "identity_262031", "image": "ScienceQA/images/13440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Connecticut?\nChoices:\n(A) Juneau\n(B) Hartford\n(C) Des Moines\n(D) Bridgeport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.204222202301025 }, { "id": "identity_194799", "image": "A-OKVQA/images/448851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is on the left side of the image, next to the calculator?" }, { "from": "gpt", "value": "The answer is A phone" } ], "clip_score": 21.359375, "loss": 4.470160007476807 }, { "id": "identity_277262", "image": "Super-CLEVR/images/superCLEVR_new_001546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the road bike on the right of the small green dirtbike the same as the metal motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.625, "loss": 5.1427903175354 }, { "id": "identity_10468", "image": "A-OKVQA/images/323851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dog have to do to keep from falling?\nChoices:\n(A) hold button\n(B) nothing\n(C) keep balance\n(D) grab rope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0390625, "loss": 4.513687610626221 }, { "id": "identity_131533", "image": "VQA2.0/images/353276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bridge in the background?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.375, "loss": 4.0854268074035645 }, { "id": "identity_70740", "image": "ScienceQA/images/1653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region is the largest in terms of area covered?" }, { "from": "gpt", "value": "The answer is The bottom right region" } ], "clip_score": 23.375, "loss": 4.408300876617432 }, { "id": "identity_337212", "image": "ScienceQA/images/15743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of relationship is formed when one fish eats parasites off another fish?\nChoices:\n(A) commensal\n(B) mutualistic\n(C) parasitic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.984375, "loss": 4.459481239318848 }, { "id": "identity_207552", "image": "A-OKVQA/images/327713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes of traffic are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.796875, "loss": 4.556509494781494 }, { "id": "identity_334578", "image": "PMC-VQA/images/PMC6267895_Fig3_404443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this CT image show?\nChoices:\n(A) The biopsy needle\n(B) A right ilium lesion\n(C) A left ilium lesion\n(D) An abdomen lesion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.453125, "loss": 4.227254390716553 }, { "id": "identity_3677", "image": "A-OKVQA/images/345020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be one main reason a police would be riding this type of motorcycle?\nChoices:\n(A) functionality\n(B) easy access\n(C) looks\n(D) speed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 4.273697853088379 }, { "id": "identity_17818", "image": "A-OKVQA/images/201519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these people laying here?\nChoices:\n(A) hungry\n(B) hiding\n(C) tired\n(D) good weather" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.03125, "loss": 4.045221328735352 }, { "id": "identity_335753", "image": "A-OKVQA/images/249882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might be their job?\nChoices:\n(A) captain\n(B) fishermen\n(C) farmer\n(D) pirate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5625, "loss": 4.465296745300293 }, { "id": "identity_277738", "image": "ScienceQA/images/7364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property does this object possess?\nChoices:\n(A) slippery\n(B) sticky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4375, "loss": 4.109824180603027 }, { "id": "identity_319913", "image": "Super-CLEVR/images/superCLEVR_new_001937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the small rubber vehicles have the same color?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.9375, "loss": 4.981513023376465 }, { "id": "identity_287269", "image": "A-OKVQA/images/18370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the cars being parked on the grass?\nChoices:\n(A) to show\n(B) to fix\n(C) to clean\n(D) to paint" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.7265625, "loss": 4.4585981369018555 }, { "id": "identity_38669", "image": "A-OKVQA/images/390658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are there blue lights on the trees?\nChoices:\n(A) for racing\n(B) for climbing\n(C) for holiday\n(D) to signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.307885646820068 }, { "id": "identity_186730", "image": "A-OKVQA/images/39592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.734375, "loss": 4.1578779220581055 }, { "id": "identity_261821", "image": "TQA/images/question_images/parts_neuron_7207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is found at the end of a cell, connecting it to another cell?\nChoices:\n(A) Synapse\n(B) Node of Ranvier\n(C) Axon\n(D) Oligodendrocyte" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.484375, "loss": 4.392785549163818 }, { "id": "identity_290127", "image": "A-OKVQA/images/404388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the tool on the right that is red and black?\nChoices:\n(A) hammer\n(B) vice grip\n(C) screwdriver\n(D) pipe wrench" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.28125, "loss": 4.601177215576172 }, { "id": "identity_22748", "image": "Super-CLEVR/images/superCLEVR_new_001177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The metallic bus has what color?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 24.34375, "loss": 4.3601837158203125 }, { "id": "identity_334297", "image": "PMC-VQA/images/PMC4385022_Fig5_374884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of radiograph was taken for this patient?\nChoices:\n(A) Bitewing radiograph\n(B) Periapical radiograph\n(C) Panoramic radiograph\n(D) Occlusal radiograph" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.234375, "loss": 4.260636329650879 }, { "id": "identity_213591", "image": "VQA2.0/images/448139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses can be seen in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.09375, "loss": 4.153586387634277 }, { "id": "identity_12006", "image": "PMC-VQA/images/PMC5816341_F2_277074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black arrows in the image? \nChoices:\n(A) high magnification \n(B) low magnification \n(C) visibility of CMC coating \n(D) absence of CMC coating " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.578125, "loss": 5.005239963531494 }, { "id": "identity_8050", "image": "ScienceQA/images/1910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Washington?\nChoices:\n(A) Orlando\n(B) Olympia\n(C) Denver\n(D) Spokane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.319577693939209 }, { "id": "identity_34210", "image": "VQA-AS/images/8709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sun low in the sky?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.28125, "loss": 4.007627487182617 }, { "id": "identity_129573", "image": "A-OKVQA/images/61997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the umbrella in the background?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.765625, "loss": 4.113742828369141 }, { "id": "identity_259153", "image": "A-OKVQA/images/514563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the name of the Irish pub to the left of the traffic light and describe the color of the awning above its entrance" }, { "from": "gpt", "value": "The answer is Emmit's, brown" } ], "clip_score": 20.703125, "loss": 4.493279457092285 }, { "id": "identity_10062", "image": "AI2D/images/3690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part represented by C?\nChoices:\n(A) none of the above\n(B) stomach\n(C) appendix\n(D) salivary glands" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.6875, "loss": 5.107935905456543 }, { "id": "identity_253136", "image": "A-OKVQA/images/333978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of beverages are stored in the refrigerator, and which one is in the green case on the right?" }, { "from": "gpt", "value": "The answer is six, Schweppes" } ], "clip_score": 28.109375, "loss": 5.050253391265869 }, { "id": "identity_291269", "image": "TextVQA/images/14860b06ba90af45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does the watch belong to?" }, { "from": "gpt", "value": "The answer is breecia" } ], "clip_score": 26.515625, "loss": 5.708348274230957 }, { "id": "identity_287082", "image": "DVQA/images/bar_train_00015144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the plug greater than the victim's value?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.421875, "loss": 4.124546527862549 }, { "id": "identity_309776", "image": "DocVQA/images/ksjw0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the location?" }, { "from": "gpt", "value": "The answer is SD" } ], "clip_score": 16.078125, "loss": 5.674735069274902 }, { "id": "identity_278470", "image": "AI2D/images/1089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does B represent?\nChoices:\n(A) blade\n(B) vein\n(C) tree\n(D) midrib" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.40625, "loss": 4.355519771575928 }, { "id": "identity_195754", "image": "Super-CLEVR/images/superCLEVR_new_001308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly behind the large cyan rubber chopper?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.578125, "loss": 4.2551164627075195 }, { "id": "identity_337705", "image": "AI2D/images/3619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is present in the centre here?\nChoices:\n(A) lamellar body\n(B) air space\n(C) alveolar fluid\n(D) tubular myelin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.5320234298706055 }, { "id": "identity_266175", "image": "FigureQA/images/3430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in intensity, Firebrick or Tomato?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 4.642971515655518 }, { "id": "identity_272885", "image": "FigureQA/images/40093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.9375, "loss": 4.0485734939575195 }, { "id": "identity_34063", "image": "PMC-VQA/images/PMC5815779_F1_276828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic test used to identify pleural effusion? \nChoices:\n(A) MRI \n(B) CT scan \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.484375, "loss": 4.329507350921631 }, { "id": "identity_1294", "image": "FigureQA/images/60803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dim Gray greater than Dark Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.078125, "loss": 4.010688304901123 }, { "id": "identity_299643", "image": "AI2D/images/637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option represents the lowest strata in the diagram: volcanoes, asthenosphere, oceanic crust, or none of the above?\nChoices:\n(A) volcanoes\n(B) none of the above\n(C) asthenosphere\n(D) oceanic crust" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 4.290987968444824 }, { "id": "identity_31873", "image": "A-OKVQA/images/580613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the word out refer to in her case?\nChoices:\n(A) energy\n(B) personality\n(C) location\n(D) sexual orientation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.3671875, "loss": 4.217095851898193 }, { "id": "identity_4913", "image": "FigureQA/images/30958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.371365547180176 }, { "id": "identity_327616", "image": "A-OKVQA/images/364590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person getting ready for?\nChoices:\n(A) sale\n(B) parade\n(C) bathing contest\n(D) dinner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1953125, "loss": 4.47189474105835 }, { "id": "identity_163", "image": "PMC-VQA/images/PMC3466286_pone-0046731-g003_159476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the bright-field image in the inlet demonstrate? \nChoices:\n(A) Density representation of cells \n(B) Labeling of individual cells \n(C) Location of TGF-β1 signal \n(D) Accumulation of autoradiography grains " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.725347995758057 }, { "id": "identity_293934", "image": "A-OKVQA/images/363100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable item sold by this establishment? Wine, tires, video games, or gwent cards?\nChoices:\n(A) wine\n(B) tires\n(C) video games\n(D) gwent cards" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.25, "loss": 4.679324626922607 }, { "id": "identity_317183", "image": "Super-CLEVR/images/superCLEVR_new_000998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything big and green in front of the shiny bicycle that is on the right side of the small brown minivan?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.21875, "loss": 5.265350818634033 }, { "id": "identity_223958", "image": "VQA2.0/images/396137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What evidence in the image indicates that the bus/train is in motion, and how can you determine the direction of motion?" }, { "from": "gpt", "value": "The answer is Blurred background outside window indicates forward motion" } ], "clip_score": 22.8125, "loss": 4.030844211578369 }, { "id": "identity_156321", "image": "AI2D/images/1360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows indicate the direction of movement for the soil in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.734375, "loss": 4.248305320739746 }, { "id": "identity_284080", "image": "PMC-VQA/images/PMC6214924_Fig4_390574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the red numbers in the image?\nChoices:\n(A) Total heart cells quantified by FACS \n(B) Percent of RFP+ quantified by FACS \n(C) Percent of NC-Cms relative to total cardiomyocytes quantified by FACS \n(D) Total cardiomyocytes quantified by FACS" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.9375, "loss": 4.287274360656738 }, { "id": "identity_299713", "image": "PMC-VQA/images/PMC6234543_Fig4_395885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of fracture did the patient sustain?\nChoices:\n(A) Femur fracture\n(B) Clavicle fracture\n(C) Sacral fracture\n(D) Tibia fracture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.765625, "loss": 4.011252403259277 }, { "id": "identity_22448", "image": "A-OKVQA/images/287737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man doing with the red machine?\nChoices:\n(A) making cider\n(B) coring/peeling apples\n(C) cutting vegetables\n(D) making juice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.453125, "loss": 4.419894218444824 }, { "id": "identity_32332", "image": "PMC-VQA/images/PMC4842197_Fig5_23684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tracer used in the images? \nChoices:\n(A) 152Tb-DOTANOC \n(B) PET/CT \n(C) AR42J tumor-bearing mouse \n(D) 25 MBq " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.65625, "loss": 4.0790300369262695 }, { "id": "identity_276365", "image": "DocVQA/images/jggn0226_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which hospital was the surgery performed?" }, { "from": "gpt", "value": "The answer is SFH" } ], "clip_score": 22.484375, "loss": 4.222933292388916 }, { "id": "identity_298041", "image": "A-OKVQA/images/402967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of structure can be found at this location? \nOptions:\n(A) cafe\n(B) pagoda\n(C) pavilion\n(D) barn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.0625, "loss": 4.2782135009765625 }, { "id": "identity_297561", "image": "A-OKVQA/images/550529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which locations is it legal to ride this type of bike?\nChoices:\n(A) sidewalk\n(B) city streets\n(C) off road\n(D) around neighborhoods" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.234375, "loss": 4.674722194671631 }, { "id": "identity_33213", "image": "PMC-VQA/images/PMC4596504_Fig2_430724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the liver in the MRI? \nChoices:\n(A) Smooth contour \n(B) Nodular contour \n(C) Irregular contour \n(D) Flat contour " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.205300807952881 }, { "id": "identity_263044", "image": "PMC-VQA/images/PMC3510424_Fig4_169660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the empty spaces signify?\nChoices:\n(A) Normal structure\n(B) Abnormal structure\n(C) Infection\n(D) Lack of fluid." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.389006614685059 }, { "id": "identity_301153", "image": "ScienceQA/images/6644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is a bird?\nChoices:\n(A) clownfish\n(B) Surinam horned frog\n(C) tiger shark\n(D) brown pelican" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.6875, "loss": 4.249908447265625 }, { "id": "identity_261829", "image": "A-OKVQA/images/12666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which destination is the van likely headed? \nChoices:\n(A) residential areas\n(B) insurance offices\n(C) educational institutions\n(D) tourist attractions" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.3125, "loss": 4.0030388832092285 }, { "id": "identity_216048", "image": "A-OKVQA/images/550055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many airplanes have a blue tail?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.671875, "loss": 4.519101142883301 }, { "id": "identity_65337", "image": "TQA/images/abc_question_images/photosynthesis_11262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might the letter T indicate in the context of plant biology?" }, { "from": "gpt", "value": "The answer is Fruit or Seed Pod" } ], "clip_score": 35.96875, "loss": 4.148494720458984 }, { "id": "identity_29490", "image": "A-OKVQA/images/305901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person getting ready to do?\nChoices:\n(A) make call\n(B) change channel\n(C) eat dinner\n(D) check temperature" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5625, "loss": 4.421530723571777 }, { "id": "identity_37726", "image": "A-OKVQA/images/292543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man doing with the beans?\nChoices:\n(A) selling them\n(B) cooking\n(C) eating them\n(D) counting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.625, "loss": 4.692318916320801 }, { "id": "identity_296992", "image": "A-OKVQA/images/124102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location would you typically find this type of traffic light? \nChoices:\n(A) highway\n(B) country road\n(C) crosswalk\n(D) near lake" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3046875, "loss": 4.343810081481934 }, { "id": "identity_312539", "image": "Super-CLEVR/images/superCLEVR_new_000070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny car left of a rubber road bike in front of a tiny yellow motorbike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 25.578125, "loss": 4.741536617279053 }, { "id": "identity_110849", "image": "TextVQA/images/03a514f0ec49375a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buttons are visible on the device?" }, { "from": "gpt", "value": "The answer is Ten" } ], "clip_score": 19.140625, "loss": 4.384515762329102 }, { "id": "identity_332136", "image": "PMC-VQA/images/PMC4515920_Fig4_409644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in these images?\nChoices:\n(A) Optic Nerve Enlargement \n(B) Hyperintensity in the Hippocampus \n(C) Cerebellar lesions \n(D) Lesions in the Occipital Lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 4.656191349029541 }, { "id": "identity_214639", "image": "VQA2.0/images/130163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the couch on the left side of the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 26.640625, "loss": 4.0458173751831055 }, { "id": "identity_207598", "image": "A-OKVQA/images/526801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are on the top shelf above the desk?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.453125, "loss": 4.423121452331543 }, { "id": "identity_309871", "image": "FigureQA/images/42469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.796875, "loss": 4.207920551300049 }, { "id": "identity_289689", "image": "PMC-VQA/images/PMC7302052_f0010_194713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the coronal view, what color is the body contour displayed as? Is it:\n(A) Red\n(B) Blue\n(C) Green\n(D) Yellow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.546875, "loss": 4.634946346282959 }, { "id": "identity_128567", "image": "VQA2.0/images/289968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bolts are visible on the sign?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.109375, "loss": 4.372440338134766 }, { "id": "identity_42918", "image": "ChartQA/images/multi_col_100851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category has the highest percentage of respondents who spent less money?" }, { "from": "gpt", "value": "The answer is General personal spending" } ], "clip_score": 30.265625, "loss": 4.424448013305664 }, { "id": "identity_262956", "image": "ScienceQA/images/14899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object match?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.890625, "loss": 4.311766147613525 }, { "id": "identity_231942", "image": "PMC-VQA/images/PMC8580671_fig2_107722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of the \"bone in bone\" appearance in the X-ray, and how do they compare to the other options provided?" }, { "from": "gpt", "value": "The answer is Denser bone structure within normal bone, resembling smaller bone inside" } ], "clip_score": 30.9375, "loss": 4.037458896636963 }, { "id": "identity_66992", "image": "AI2D/images/3081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ribosomes are depicted in this cross-section of an animal cell?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 36.65625, "loss": 4.67140007019043 }, { "id": "identity_15762", "image": "VQA-RAD/images/synpic20375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side is the lesion on?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 25.578125, "loss": 4.151474952697754 }, { "id": "identity_17702", "image": "FigureQA/images/52532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.015625, "loss": 4.038390159606934 }, { "id": "identity_280880", "image": "ScienceQA/images/3473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following organisms, which one contains matter that was previously part of the lichen?\nChoices:\n(A) grizzly bear\n(B) snowy owl\n(C) bilberry\n(D) brown lemming" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.0, "loss": 4.115993022918701 }, { "id": "identity_214414", "image": "TextVQA/images/1ea26b47ed2dded0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What products are advertised under the Sterling brand in the image?" }, { "from": "gpt", "value": "The answer is Bay Rum Lotion" } ], "clip_score": 26.203125, "loss": 4.07700777053833 }, { "id": "identity_291963", "image": "PMC-VQA/images/PMC6142763_fig2_368840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the ultrasound images?\nChoices:\n(A) Posteriorly located and contracted cervix and lower uterine segment. \n(B) Anteriorly located and stretched cervix and lower uterine segment. \n(C) Laterally located and expanded cervix and lower uterine segment. \n(D) Anteriorly located and contracted cervix and upper uterine segment." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.1875, "loss": 4.466150283813477 }, { "id": "identity_311834", "image": "A-OKVQA/images/356147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people most probably trying to do to the elephants?\nChoices:\n(A) wash\n(B) train\n(C) play\n(D) feed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.390625, "loss": 4.415831089019775 }, { "id": "identity_250128", "image": "A-OKVQA/images/576780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the visible signs and products, which continent is this fruit stand most likely located in, and what specific clues lead to this conclusion?" }, { "from": "gpt", "value": "The answer is Asia" } ], "clip_score": 33.3125, "loss": 4.370823383331299 }, { "id": "identity_297446", "image": "PMC-VQA/images/PMC6731561_Fig7_34899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the red dotted line in the context provided?\nChoices:\n(A) the line of symmetry in the cartilage surface \n(B) the maximum stress level that can be reached without damage \n(C) the boundary between cartilage and subchondral bone \n(D) the measurement range of stress relaxation optical coherence straingraphy (SR-OCSA)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.34375, "loss": 4.472962379455566 }, { "id": "identity_23081", "image": "VQA-AS/images/13001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two pictures look like a sunset?" }, { "from": "gpt", "value": "The answer is top" } ], "clip_score": 23.828125, "loss": 4.031435966491699 }, { "id": "identity_29723", "image": "A-OKVQA/images/406139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the woman holding an umbrella?\nChoices:\n(A) block sun\n(B) staying dry\n(C) block wind\n(D) to dance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.336687088012695 }, { "id": "identity_261571", "image": "IconQA/images/37479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a paw, which picture is the seventh one in the sequence?\nChoices:\n(A) dog\n(B) house\n(C) paw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.265625, "loss": 4.433706760406494 }, { "id": "identity_261577", "image": "FigureQA/images/64445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive Drab the smoothest option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.140625, "loss": 4.286492824554443 }, { "id": "identity_274992", "image": "TextVQA/images/3b3bc758010e3ef9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the hood of the car located to the left of the orange truck with flames?" }, { "from": "gpt", "value": "The answer is stutz" } ], "clip_score": 22.046875, "loss": 4.004871845245361 }, { "id": "identity_225199", "image": "TQA/images/question_images/sun_layers_6310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers are depicted in the image, and what is the sum of the number of letters in the names of all the layers?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 19.421875, "loss": 4.20747709274292 }, { "id": "identity_293913", "image": "PMC-VQA/images/PMC7823255_animals-11-00063-f002_348387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image: CT scan, MRI, B-mode, or X-Ray?\nChoices:\n(A) CT scan \n(B) MRI \n(C) B-mode \n(D) X-Ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.1875, "loss": 4.5452494621276855 }, { "id": "identity_336401", "image": "TQA/images/abc_question_images/earth_moon_phases_11257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter R signify?\nChoices:\n(A) CRESCENT MOON\n(B) QUARTER MOON\n(C) WAXING CRESCENT\n(D) GIBBOUS MOON" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.8125, "loss": 5.160269260406494 }, { "id": "identity_31474", "image": "PMC-VQA/images/PMC6843538_microorganisms-07-00397-f005_65528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process is happening in the image? \nChoices:\n(A) Photosynthesis \n(B) Mitosis \n(C) Cellular respiration \n(D) None of the above " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.03125, "loss": 4.496320724487305 }, { "id": "identity_10642", "image": "AI2D/images/305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the cougar had more competition over elk, what would most likely happen?\nChoices:\n(A) Cougars decrease\n(B) Cougars increase\n(C) Trees decrease\n(D) Eagles decrease" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.734375, "loss": 4.121496677398682 }, { "id": "identity_324281", "image": "DocVQA/images/xqfl0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who does the Operating Committee report to?" }, { "from": "gpt", "value": "The answer is the board chair" } ], "clip_score": 31.734375, "loss": 4.054478645324707 }, { "id": "identity_223542", "image": "VQA-AS/images/7606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.375, "loss": 4.361825466156006 }, { "id": "identity_282103", "image": "PMC-VQA/images/PMC7429820_Fig1_234904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the vertical position of the text represent in the figure?\nChoices:\n(A) Frequency\n(B) Amplitude\n(C) Time\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.1015625, "loss": 4.811013698577881 }, { "id": "identity_208228", "image": "VizWiz/images/VizWiz_train_00003042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many knobs are on the front of the grill?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.34375, "loss": 4.726768970489502 }, { "id": "identity_9101", "image": "TabMWP/images/18569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the data to answer the question below. Is the following statement about our solar system true or false? Saturn's volume is more than 10,000 times as large as Mercury's.\nChoices:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.28125, "loss": 4.032902717590332 }, { "id": "identity_36975", "image": "A-OKVQA/images/183100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would these tools be found?\nChoices:\n(A) kitchen\n(B) store\n(C) trunk\n(D) street" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.390625, "loss": 4.249022483825684 }, { "id": "identity_3688", "image": "A-OKVQA/images/498425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the group in the street engaged in?\nChoices:\n(A) protesting\n(B) dancing\n(C) voting\n(D) gaming" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.734375, "loss": 4.027369022369385 }, { "id": "identity_277962", "image": "ScienceQA/images/1371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Georgia?\nChoices:\n(A) Boise\n(B) Oklahoma City\n(C) Atlanta\n(D) Savannah" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.78125, "loss": 4.173598289489746 }, { "id": "identity_10895", "image": "IconQA/images/38312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough lids for every cup?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.21875, "loss": 4.235512733459473 }, { "id": "identity_299356", "image": "A-OKVQA/images/467753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is yellow tape being used in this location?\nChoices:\n(A) party\n(B) backhoe digging\n(C) repossession\n(D) crime scene" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.9375, "loss": 4.332273483276367 }, { "id": "identity_124062", "image": "VQA2.0/images/178484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the wall to the left of the door?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.6875, "loss": 4.851010799407959 }, { "id": "identity_73532", "image": "TQA/images/question_images/faults_1746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of faults are depicted in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 37.375, "loss": 4.628221035003662 }, { "id": "identity_65199", "image": "TQA/images/abc_question_images/faults_11717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faults in the image are a result of horizontal movement?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.90625, "loss": 4.322630405426025 }, { "id": "identity_324582", "image": "PMC-VQA/images/PMC5307799_Fig1_127460.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the larvae in this image?\nChoices:\n(A) Anterior to the base,\n(B) Posterior to the base,\n(C) Anterior to the top,\n(D) Posterior to the top" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.28125, "loss": 4.752579689025879 }, { "id": "identity_276561", "image": "ScienceQA/images/15565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's skin is more effectively adapted as a warning sign to deter predators?\nChoices:\n(A) impala\n(B) crown-of-thorns sea star" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.71875, "loss": 4.101903915405273 }, { "id": "identity_337176", "image": "PMC-VQA/images/PMC6925378_FIG2_88688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the preoperative MRI?\nChoices:\n(A) Left sacroiliac joint effusion with edema. \n(B) Right sacroiliac joint effusion with edema. \n(C) Lumbar spine herniation. \n(D) Head trauma." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.515625, "loss": 4.236992835998535 }, { "id": "identity_26631", "image": "DocVQA/images/pnfm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the designation of Dr. Ray E. Brown ?" }, { "from": "gpt", "value": "The answer is associate professor" } ], "clip_score": 18.84375, "loss": 4.543449401855469 }, { "id": "identity_248210", "image": "DocVQA/images/fsmc0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the purpose of the January meeting of the Council, and what were the main conclusions drawn from the December 10, 1965, meeting?" }, { "from": "gpt", "value": "The answer is Discuss Council communication, responsibility, direction; Council not achieving potential" } ], "clip_score": 27.0625, "loss": 4.666632175445557 }, { "id": "identity_320113", "image": "A-OKVQA/images/243970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of food is shown here?\nChoices:\n(A) pescatarian\n(B) vegan\n(C) meat\n(D) vegetarian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.453125, "loss": 4.3547234535217285 }, { "id": "identity_69584", "image": "AI2D/images/2606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase of the Moon is depicted in the image?" }, { "from": "gpt", "value": "The answer is First quarter phase" } ], "clip_score": 26.765625, "loss": 4.37684440612793 }, { "id": "identity_330665", "image": "PMC-VQA/images/PMC4353421_fig1_365270.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion found on the computed tomography?\nChoices:\n(A) Upper lobe bronchus\n(B) Middle lobe bronchus\n(C) Lower lobe bronchus\n(D) No description provided" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.828125, "loss": 4.01883602142334 }, { "id": "identity_267730", "image": "IconQA/images/88239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape of this object:\nOptions:\n(A) cube\n(B) cone\n(C) sphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.215749263763428 }, { "id": "identity_271549", "image": "A-OKVQA/images/449634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the overhead wires used? \nChoices:\n(A) to power trains\n(B) for internet\n(C) for phone lines\n(D) for electrical utility" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.890625, "loss": 4.065516471862793 }, { "id": "identity_301578", "image": "ScienceQA/images/1374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Newark\n(B) Lansing\n(C) Boise\n(D) Missoula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.171875, "loss": 4.271625518798828 }, { "id": "identity_329926", "image": "FigureQA/images/97096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Light Sky Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.21875, "loss": 4.094651222229004 }, { "id": "identity_218018", "image": "VQA2.0/images/524788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cranes are visible in the skyline?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.296875, "loss": 4.3978962898254395 }, { "id": "identity_302847", "image": "FigureQA/images/16477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.609375, "loss": 4.0033111572265625 }, { "id": "identity_280228", "image": "IconQA/images/10355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the second one, given that the first picture is a submarine?\nChoices:\n(A) submarine\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0, "loss": 4.091503620147705 }, { "id": "identity_313314", "image": "PMC-VQA/images/PMC8102820_f1_435049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain exhibited patchy lesions?\nChoices:\n(A) Temporal lobe\n(B) Cerebellum\n(C) Medulla oblongata\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.390625, "loss": 4.987321376800537 }, { "id": "identity_322394", "image": "ScienceQA/images/18774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option below is a fish?\nChoices:\n(A) great white shark\n(B) kangaroo\n(C) tiger salamander\n(D) Amazon tree boa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.90625, "loss": 4.170310020446777 }, { "id": "identity_17375", "image": "AI2D/images/3374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bottom-most part of the structure shown?\nChoices:\n(A) none of the above\n(B) sepals\n(C) pedicel\n(D) petal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.212599277496338 }, { "id": "identity_324448", "image": "PMC-VQA/images/PMC7612751_F4_290695.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in this image?\nChoices:\n(A) positron emission tomography\n(B) computed tomography\n(C) magnetic resonance imaging\n(D) X-ray imaging" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5625, "loss": 4.831772804260254 }, { "id": "identity_6424", "image": "TQA/images/question_images/seasons_1447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What comes before the summer solstice?\nChoices:\n(A) Vernal Equinox\n(B) Winter Solstice\n(C) Autumnal Equinox\n(D) Winter Equinox" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.921875, "loss": 4.20542049407959 }, { "id": "identity_221424", "image": "A-OKVQA/images/517425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats have visible registration numbers on their hulls?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.078125, "loss": 4.577852249145508 }, { "id": "identity_305586", "image": "ScienceQA/images/14100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the fingers of this plant?\nChoices:\n(A) the bananas\n(B) the stems" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.875, "loss": 4.012110710144043 }, { "id": "identity_226636", "image": "DocVQA/images/szbn0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the significant new responsibilities and authorities given to the EPA under the Toxic Substances Control Act (TSCA) as mentioned in the text?" }, { "from": "gpt", "value": "The answer is Advance chemical safety, establish criteria for chemical risk evaluation" } ], "clip_score": 24.828125, "loss": 4.237545967102051 }, { "id": "identity_36042", "image": "TQA/images/question_images/parts_fish_2909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fin is used for forward movement?\nChoices:\n(A) dorsal\n(B) caudal\n(C) pelvic\n(D) pectoral" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.118658542633057 }, { "id": "identity_333325", "image": "A-OKVQA/images/324428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the large mechanical devices on the back of the boats do?\nChoices:\n(A) propel\n(B) hold bait\n(C) anchor\n(D) catch fish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.03125, "loss": 4.620695114135742 }, { "id": "identity_131941", "image": "TextVQA/images/002a41de49c574f5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many American flags can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.328125, "loss": 4.29440450668335 }, { "id": "identity_275448", "image": "PMC-VQA/images/PMC7601856_materials-13-04498-f004_288003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the SEM image in (d)?\nChoices:\n(A) CrFeNiMn alloy \n(B) SLM-2 \n(C) SE images of equiaxed grains \n(D) XY-plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.046875, "loss": 4.188699722290039 }, { "id": "identity_3841", "image": "A-OKVQA/images/125476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What design is on each arm?\nChoices:\n(A) cat\n(B) eagles\n(C) dog\n(D) bat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.53125, "loss": 4.0966572761535645 }, { "id": "identity_338049", "image": "A-OKVQA/images/479466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of product is being advertised by the pink box?\nChoices:\n(A) water\n(B) soda\n(C) coffee\n(D) gatorade" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.5859375, "loss": 4.191324234008789 }, { "id": "identity_145491", "image": "MapQA/images/map_7797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have attempts within the range of 50971-62821?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 23.078125, "loss": 4.193129062652588 }, { "id": "identity_307836", "image": "Super-CLEVR/images/superCLEVR_new_000446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the big vehicle and the bike made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.953125, "loss": 5.4244842529296875 }, { "id": "identity_297671", "image": "A-OKVQA/images/437967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of meat is served on the dish?\nOptions:\n(A) chicken\n(B) beef\n(C) pork chop\n(D) salmon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.078243732452393 }, { "id": "identity_260469", "image": "Geometry3K/images/1476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\( \\angle H \\)" }, { "from": "gpt", "value": "The answer is 97" } ], "clip_score": 31.84375, "loss": 4.542389869689941 }, { "id": "identity_47382", "image": "DocVQA/images/spnf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the follow-up experience duration mentioned in the document?" }, { "from": "gpt", "value": "The answer is 12 years Biennial" } ], "clip_score": 18.125, "loss": 4.394449710845947 }, { "id": "identity_271190", "image": "PMC-VQA/images/PMC6968987_Fig1_102566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area in the image displays hyperintense signal, with the options being:\n(A) Anterior limb of internal capsule\n(B) Posterior limb of internal capsule\n(C) Corpus callosum\n(D) Thalamus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.859375, "loss": 4.2843918800354 }, { "id": "identity_285633", "image": "A-OKVQA/images/555020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fruit is located at the top of the scoop with the banana and oat?\nChoices:\n(A) cantaloupe\n(B) raspberry\n(C) blackberry\n(D) strawberry" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.796875, "loss": 4.019160747528076 }, { "id": "identity_328421", "image": "PMC-VQA/images/PMC4766641_Fig1_5811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk indicate in the images?\nChoices:\n(A) stroma\n(B) adenocarcinoma\n(C) hyperplasia\n(D) grade III" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.265625, "loss": 4.378766059875488 }, { "id": "identity_322667", "image": "PMC-VQA/images/PMC4283670_ANNRHEUMDIS2014205839F3_348408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which joint is affected in the left index finger of the patient?\nChoices:\n(A) Metacarpophalangeal joint \n(B) Distal interphalangeal joint \n(C) Proximal interphalangeal joint \n(D) Interphalangeal joint (not specified)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.46875, "loss": 4.386972904205322 }, { "id": "identity_282131", "image": "A-OKVQA/images/244713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the person wearing the white hat according to the given choices?\nChoices:\n(A) nothing\n(B) blender\n(C) forward movement\n(D) aerobic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.3828125, "loss": 4.6443071365356445 }, { "id": "identity_262857", "image": "AI2D/images/3510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label of T in the diagram provided above?\nChoices:\n(A) Nucleus\nNucleus\n(B) Cytoplasm\n(C) Mitochondrion\n(D) Chloroplast" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.625, "loss": 4.001106262207031 }, { "id": "identity_263617", "image": "AI2D/images/473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given food chain diagram, what term could be used to describe the tadpole? Is it a predator, energy source, prey, or producer?\nChoices:\n(A) predator\n(B) energy source\n(C) prey\n(D) producer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.609375, "loss": 4.189169883728027 }, { "id": "identity_309874", "image": "PMC-VQA/images/PMC6360709_Fig1_431564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which plane is used for the CT in this region?\nChoices:\n(A) Coronal plane \n(B) Sagittal plane \n(C) Axial plane \n(D) Transverse plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5, "loss": 4.785414218902588 }, { "id": "identity_307873", "image": "AI2D/images/2973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents this?\nChoices:\n(A) i\n(B) f\n(C) h\n(D) k" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6171875, "loss": 4.289202690124512 }, { "id": "identity_262683", "image": "A-OKVQA/images/574074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary component of this bird's diet?\nChoices:\n(A) swamp vegetation\n(B) insects\n(C) fish\n(D) grubs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.96875, "loss": 4.547657012939453 }, { "id": "identity_154936", "image": "ScienceQA/images/6775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather conditions can be inferred from the sky in the image?" }, { "from": "gpt", "value": "The answer is Fair weather" } ], "clip_score": 17.0625, "loss": 4.35342264175415 }, { "id": "identity_280929", "image": "FigureQA/images/55324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Coral or Dark Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.609375, "loss": 4.641931056976318 }, { "id": "identity_333273", "image": "A-OKVQA/images/165862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hat does the person wearing jeans have on their head?\nChoices:\n(A) fedora\n(B) bucket hat\n(C) safari hat\n(D) top hat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.53125, "loss": 4.259861469268799 }, { "id": "identity_275286", "image": "FigureQA/images/86875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Blue? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.96875, "loss": 4.202309608459473 }, { "id": "identity_23887", "image": "PMC-VQA/images/PMC2592545_pone-0003928-g003_30793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI was used for this study? \nChoices:\n(A) T1-weighted MRI \n(B) T2-weighted MRI \n(C) CT scan \n(D) PET scan " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.490342617034912 }, { "id": "identity_131661", "image": "Super-CLEVR/images/superCLEVR_new_000624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the purple car?" }, { "from": "gpt", "value": "The answer is A train" } ], "clip_score": 30.875, "loss": 4.170173645019531 }, { "id": "identity_299539", "image": "A-OKVQA/images/124128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the symbol had a line drawn through it? \nChoices:\n(A) stop\n(B) go forward\n(C) turn left\n(D) turn right" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.265625, "loss": 4.298151969909668 }, { "id": "identity_269546", "image": "FigureQA/images/97009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the smoothest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.1875, "loss": 4.191711902618408 }, { "id": "identity_286211", "image": "PMC-VQA/images/PMC4964469_ijms-17-01093-f002_56711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which component is the target of staining in the image?\nChoices:\n(A) CD3\n(B) Ki-67\n(C) CD45\n(D) CD20" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5625, "loss": 4.522997856140137 }, { "id": "identity_211826", "image": "VQA2.0/images/168618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many human figures are present around the clock?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.6953125, "loss": 4.371756076812744 }, { "id": "identity_27282", "image": "PlotQA/images/49330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of the Y-axis ?" }, { "from": "gpt", "value": "The answer is Year" } ], "clip_score": 19.640625, "loss": 4.673096656799316 }, { "id": "identity_321181", "image": "VizWiz/images/VizWiz_train_00009150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is shown in the image?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.203125, "loss": 4.479940891265869 }, { "id": "identity_331610", "image": "A-OKVQA/images/142826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this area located next to?\nChoices:\n(A) fountain\n(B) beach\n(C) stream\n(D) pool" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.8125, "loss": 4.3566789627075195 }, { "id": "identity_304797", "image": "Super-CLEVR/images/superCLEVR_new_001502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the regular bus next to the small green object and the regular bus beside the small green minivan made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.75, "loss": 4.887948513031006 }, { "id": "identity_205584", "image": "VQA2.0/images/452198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the guard booth in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.390625, "loss": 4.0494818687438965 }, { "id": "identity_15023", "image": "A-OKVQA/images/136770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of footwear is the person in the white shirt wearing?\nChoices:\n(A) adidas\n(B) nike\n(C) skechers\n(D) new balance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.15625, "loss": 4.378903388977051 }, { "id": "identity_31521", "image": "PMC-VQA/images/PMC3228743_F1_117900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view was used in the ecocardiography? \nChoices:\n(A) Transesophageal view \n(B) Suprasternal view \n(C) Paraesternal view \n(D) Transthoracic view " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.484375, "loss": 4.3820037841796875 }, { "id": "identity_246108", "image": "Super-CLEVR/images/superCLEVR_new_000016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cyan objects are there in the image, and are any of them located to the left of the red metallic car?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 30.828125, "loss": 4.5365800857543945 }, { "id": "identity_308566", "image": "ScienceQA/images/4636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted colony?\nChoices:\n(A) Massachusetts\n(B) New York\n(C) New Jersey\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.53125, "loss": 4.974717617034912 }, { "id": "identity_258669", "image": "VQA-AS/images/14222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image, and what is the boy riding?" }, { "from": "gpt", "value": "The answer is Two animals, moped" } ], "clip_score": 36.0625, "loss": 4.621030807495117 }, { "id": "identity_33790", "image": "A-OKVQA/images/362309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what nation is this bus found?\nChoices:\n(A) dominica\n(B) philippines\n(C) india\n(D) thailand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.015625, "loss": 4.244034767150879 }, { "id": "identity_310409", "image": "PMC-VQA/images/PMC7435725_materials-13-03400-f006_237018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the left column?\nChoices:\n(A) Higher magnifications \n(B) Lower magnifications \n(C) Blends \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.03125, "loss": 4.592855930328369 }, { "id": "identity_273785", "image": "FigureQA/images/62838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Blue or Tomato?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.078125, "loss": 4.7715349197387695 }, { "id": "identity_135546", "image": "DocVQA/images/mxvm0041_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first guest's name written on the receipt?" }, { "from": "gpt", "value": "The answer is Francisco Morales" } ], "clip_score": 26.765625, "loss": 4.252682209014893 }, { "id": "identity_269849", "image": "ScienceQA/images/16717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Richmond\n(C) Orlando\n(D) Columbia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.3125, "loss": 4.047605514526367 }, { "id": "identity_191014", "image": "PMC-VQA/images/PMC3466236_pone-0047230-g003_159344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled with the letter 'e' on the scale?" }, { "from": "gpt", "value": "The answer is White matter" } ], "clip_score": 34.375, "loss": 4.217479228973389 }, { "id": "identity_329669", "image": "A-OKVQA/images/284902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the predominant color of the fruit?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) orange" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.4921875, "loss": 4.2656378746032715 }, { "id": "identity_329644", "image": "A-OKVQA/images/142667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these people doing?\nChoices:\n(A) eating breakfast\n(B) standing line\n(C) cleaning up\n(D) waiting buss" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.671875, "loss": 4.23122501373291 }, { "id": "identity_332457", "image": "ScienceQA/images/2323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) Africa\n(B) Antarctica\n(C) South America\n(D) Europe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 4.020874977111816 }, { "id": "identity_307759", "image": "TQA/images/abc_question_images/human_system_eye_16132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the lens?\nChoices:\n(A) D\n(B) W\n(C) S\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0625, "loss": 4.393755912780762 }, { "id": "identity_188427", "image": "VizWiz/images/VizWiz_train_00004219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many personal care items are on the sink?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.875, "loss": 4.444362640380859 }, { "id": "identity_229177", "image": "Super-CLEVR/images/superCLEVR_new_001453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same shape as the big cyan matte object and is made of metal?" }, { "from": "gpt", "value": "The answer is gold" } ], "clip_score": 17.734375, "loss": 4.382964611053467 }, { "id": "identity_3331", "image": "Super-CLEVR/images/superCLEVR_new_001206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the red bus have the same material as the purple cruiser that is in front of the green metallic tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.1875, "loss": 5.004952907562256 }, { "id": "identity_264655", "image": "Super-CLEVR/images/superCLEVR_new_001929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the rubber object on the right side of the red shiny thing the same as the small metal scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.578125, "loss": 5.2259440422058105 }, { "id": "identity_1491", "image": "A-OKVQA/images/114025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does the woman need string?\nChoices:\n(A) knit\n(B) fly kite\n(C) sew\n(D) tie ends" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.251664161682129 }, { "id": "identity_213156", "image": "A-OKVQA/images/190393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella attached to the chair?" }, { "from": "gpt", "value": "The answer is Navy blue" } ], "clip_score": 21.859375, "loss": 4.090463638305664 }, { "id": "identity_332072", "image": "A-OKVQA/images/106794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual in the carriage most likely searching for?\nChoices:\n(A) food\n(B) hay\n(C) predators\n(D) passengers" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.484375, "loss": 4.456441402435303 }, { "id": "identity_45337", "image": "DocVQA/images/lzgj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size group classification of Hitkari Multifilters Ltd.?" }, { "from": "gpt", "value": "The answer is Decile 5" } ], "clip_score": 23.53125, "loss": 4.075710773468018 }, { "id": "identity_182469", "image": "A-OKVQA/images/242163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 21.1875, "loss": 4.242252349853516 }, { "id": "identity_333774", "image": "Super-CLEVR/images/superCLEVR_new_001995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the purple object that is made of the same material as the small gray vehicle?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 28.875, "loss": 5.111508369445801 }, { "id": "identity_298046", "image": "PMC-VQA/images/PMC8921193_Fig4_222649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was discovered in the postoperative MRI scan?\nChoices:\n(A) Hematoma \n(B) Temporal lobe \n(C) Column cavity \n(D) Fluid-attenuated inversion recovery hyperintensity lesion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.5625, "loss": 4.340239524841309 }, { "id": "identity_239529", "image": "TextVQA/images/329087659dd853b8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the three advertisements visible on the wall?" }, { "from": "gpt", "value": "The answer is broadlawgroup, See Taylor Art, guycooklaw.com" } ], "clip_score": 14.4921875, "loss": 5.576383113861084 }, { "id": "identity_319262", "image": "ScienceQA/images/12295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is marked on the map?\nChoices:\n(A) Milwaukee\n(B) Chicago\n(C) St. Louis\n(D) Indianapolis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.03125, "loss": 4.185131072998047 }, { "id": "identity_213923", "image": "A-OKVQA/images/251600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.453125, "loss": 4.572005748748779 }, { "id": "identity_337515", "image": "DocVQA/images/pjbl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of Dr. Edward F. Coyle?" }, { "from": "gpt", "value": "The answer is Director" } ], "clip_score": 14.78125, "loss": 4.199220657348633 }, { "id": "identity_26072", "image": "PMC-VQA/images/PMC3673874_F1_209269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI was used to produce the image?\nChoices:\n(A) Axial T1-weighted MRI \n(B) Axial T2-weighted MRI \n(C) Axial FLAIR-weighted MRI \n(D) Axial DWI-weighted MRI " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.765625, "loss": 4.015242576599121 }, { "id": "identity_285297", "image": "A-OKVQA/images/187852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the common profession shared by many of the individuals depicted in the images shown here?\nChoices:\n(A) fire fighters\n(B) journalism\n(C) baby photographers\n(D) politicians" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.875, "loss": 4.598352909088135 }, { "id": "identity_215924", "image": "VQA2.0/images/62204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the monster truck?" }, { "from": "gpt", "value": "The answer is JUST" } ], "clip_score": 27.28125, "loss": 5.368040561676025 }, { "id": "identity_11030", "image": "VQA-AS/images/19535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why is the lady so surprised?" }, { "from": "gpt", "value": "The answer is girl has taken all books down and put her animals on bookcase" } ], "clip_score": 23.84375, "loss": 4.181568145751953 }, { "id": "identity_300191", "image": "FigureQA/images/54333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.15625, "loss": 4.1650919914245605 }, { "id": "identity_71013", "image": "ScienceQA/images/4359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tubes are connected to the top of the equipment?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.65625, "loss": 4.228578567504883 }, { "id": "identity_74671", "image": "ScienceQA/images/3139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are highlighted in green on the map?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.671875, "loss": 4.149511814117432 }, { "id": "identity_53066", "image": "DocVQA/images/ljkm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current status of the project as of the date on the document?" }, { "from": "gpt", "value": "The answer is Collection done, terms entered, definitions being prepared" } ], "clip_score": 24.9375, "loss": 5.733681678771973 }, { "id": "identity_267756", "image": "A-OKVQA/images/208408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following phrases is the most suitable choice?\nOptions:\n(A) airplane race\n(B) conflagration\n(C) downpour\n(D) earthquake" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5546875, "loss": 5.320538520812988 }, { "id": "identity_12290", "image": "Super-CLEVR/images/superCLEVR_new_000109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a big shiny object that is on the left side of the matte tandem bike; is its shape the same as the tiny gray shiny object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.421875, "loss": 5.026169776916504 }, { "id": "identity_331096", "image": "DocVQA/images/xpfm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the co-chairman of this panel?" }, { "from": "gpt", "value": "The answer is bill carey" } ], "clip_score": 24.96875, "loss": 4.22090482711792 }, { "id": "identity_106573", "image": "VQA2.0/images/131580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many storage boxes can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.546875, "loss": 4.810579299926758 }, { "id": "identity_31896", "image": "PMC-VQA/images/PMC5806936_dentistry-04-00029-f006_273972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green arrow showing in the image? \nChoices:\n(A) Oedema \n(B) Necrosis \n(C) Inflammation \n(D) Signal gain. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 5.4654107093811035 }, { "id": "identity_17143", "image": "FigureQA/images/93031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Sienna have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.0336103439331055 }, { "id": "identity_204267", "image": "A-OKVQA/images/136928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings can be seen in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.046875, "loss": 4.251498222351074 }, { "id": "identity_120198", "image": "TextVQA/images/0c840974bf0844d9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the sidewalk to the right?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 26.984375, "loss": 4.767073154449463 }, { "id": "identity_284792", "image": "VQA2.0/images/167892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the train in motion?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.3125, "loss": 4.0717034339904785 }, { "id": "identity_305968", "image": "VQA-AS/images/17623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person holding in their hand?" }, { "from": "gpt", "value": "The answer is wine" } ], "clip_score": 24.5, "loss": 4.017179012298584 }, { "id": "identity_335848", "image": "PMC-VQA/images/PMC4459984_pone.0128306.g007_394562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow in the image indicate?\nChoices:\n(A) nucleus\n(B) cytoplasm\n(C) storage vesicles\n(D) extracellular matrix" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.109375, "loss": 4.965120792388916 }, { "id": "identity_53613", "image": "PlotQA/images/86928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the X-axis?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 18.640625, "loss": 4.167721271514893 }, { "id": "identity_133449", "image": "VizWiz/images/VizWiz_train_00006328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the CD as suggested by the cover?" }, { "from": "gpt", "value": "The answer is Sound healing or health" } ], "clip_score": 23.625, "loss": 5.243192672729492 }, { "id": "identity_25400", "image": "A-OKVQA/images/101390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of store is this?\nChoices:\n(A) beauty\n(B) shoe\n(C) pet\n(D) food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.375, "loss": 4.018336772918701 }, { "id": "identity_261303", "image": "FigureQA/images/52911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate a larger quantity than Dark Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.75, "loss": 4.038151264190674 }, { "id": "identity_20347", "image": "PMC-VQA/images/PMC6447645_Fig5_460853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the sublime tubercle of the ulna located? \nChoices:\n(A) At the medial epicondyle \n(B) At the lateral epicondyle \n(C) Inferior aspect of the medial epicondyle \n(D) None of the above " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.6875, "loss": 4.2544145584106445 }, { "id": "identity_151585", "image": "AI2D/images/3098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled 10, and what is its primary function?" }, { "from": "gpt", "value": "The answer is Cell membrane, controls substance movement" } ], "clip_score": 25.84375, "loss": 4.272494792938232 }, { "id": "identity_26732", "image": "IconQA/images/52464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.65625, "loss": 4.642492294311523 }, { "id": "identity_332556", "image": "PMC-VQA/images/PMC9200539_fig6_311851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the peritoneum of the individual?\nChoices:\n(A) cystic shadows without enhancement\n(B) multiple nodular shadows\n(C) uniform enhancement\n(D) extensive fluid density shadows" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.921875, "loss": 4.26077127456665 }, { "id": "identity_335223", "image": "IconQA/images/3493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.1875, "loss": 4.041323661804199 }, { "id": "identity_310259", "image": "Super-CLEVR/images/superCLEVR_new_001754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small cyan metallic objects that have the same shape as this tiny cyan matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.140625, "loss": 5.197096347808838 }, { "id": "identity_26964", "image": "PMC-VQA/images/PMC7787946_f0005_335962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the tracings shown in the image? \nChoices:\n(A) To compare the size of the thalamus between the two patients. \n(B) To compare the cognitive abilities of the two patients. \n(C) To show the difference in texture between the two parts of the thalamus. \n(D) To indicate the location of the thalamus in the brain. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.453125, "loss": 4.428737640380859 }, { "id": "identity_329814", "image": "A-OKVQA/images/566941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What piece of clothing is the elephant holding?\nChoices:\n(A) dress\n(B) hat\n(C) shoes\n(D) pants" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.390625, "loss": 4.186574459075928 }, { "id": "identity_298661", "image": "IconQA/images/34740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Out of the three activities Carla did on Sunday evening, which one did she do last?\nChoices:\n(A) listened to music\n(B) practiced piano\n(C) went for a walk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.1875, "loss": 4.065885066986084 }, { "id": "identity_11152", "image": "ScienceQA/images/16237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Delaware?\nChoices:\n(A) Dover\n(B) Hartford\n(C) Georgetown\n(D) Wilmington" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.34375, "loss": 4.155433177947998 }, { "id": "identity_286253", "image": "A-OKVQA/images/287422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of wheel is located behind the bench? \n(A) train wheels\n(B) chariot wheels\n(C) artillery wheels\n(D) wagon wheel" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.046875, "loss": 4.068272590637207 }, { "id": "identity_301727", "image": "ScienceQA/images/20233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is accurate about hurricanes?\nChoices:\n(A) Hurricanes can be found only over ocean water.\n(B) Hurricanes are large spiral-shaped storms.\n(C) Hurricanes can be found only over land." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 4.019116401672363 }, { "id": "identity_276647", "image": "FigureQA/images/19362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red not the minimum? \n\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.265625, "loss": 4.130087375640869 }, { "id": "identity_133877", "image": "TextVQA/images/954293b4781134d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing lights are visible?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.046875, "loss": 4.351766586303711 }, { "id": "identity_120624", "image": "TextVQA/images/dac4ca4a4498d6dd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.109375, "loss": 4.207342624664307 }, { "id": "identity_321111", "image": "ScienceQA/images/972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is a paper clip?\nChoices:\n(A) a solid\n(B) a liquid\n(C) a gas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.375, "loss": 4.351160526275635 }, { "id": "identity_326875", "image": "PMC-VQA/images/PMC8827142_Fig3_187216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area indicated by this arrow? \nChoices:\n(A) The ventricles \n(B) The brainstem \n(C) The meninges \n(D) The cerebellum." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 4.013485908508301 }, { "id": "identity_282863", "image": "A-OKVQA/images/509811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To whom do those laptops belong?\nChoices:\n(A) one individual\n(B) library\n(C) multiple individuals\n(D) non profit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.4375, "loss": 4.981365203857422 }, { "id": "identity_9735", "image": "A-OKVQA/images/529041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the red truck in the image?\nChoices:\n(A) health safety\n(B) distinguish fires\n(C) night partys\n(D) citizen transportation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 5.240973472595215 }, { "id": "identity_301681", "image": "Super-CLEVR/images/superCLEVR_new_001985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the rubber thing that is the same size as the gray metallic object?" }, { "from": "gpt", "value": "The answer is truck" } ], "clip_score": 22.3125, "loss": 4.337855815887451 }, { "id": "identity_333648", "image": "PMC-VQA/images/PMC7379153_F7_219107.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the top section display? \nChoices:\n(A) Activity for untrustworthy faces in certain fields \n(B) Activity for trustworthy faces in some fields \n(C) Activity for trustworthy faces in various fields \n(D) Activity for neutral faces in various fields." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.828125, "loss": 4.446719169616699 }, { "id": "identity_277539", "image": "DVQA/images/bar_train_00153502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values exceeding 1?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 18.453125, "loss": 4.079098701477051 }, { "id": "identity_130830", "image": "A-OKVQA/images/306663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many columns are in front of the building?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.046875, "loss": 4.481952667236328 }, { "id": "identity_26477", "image": "A-OKVQA/images/400123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the opposite wall from the sink?\nChoices:\n(A) picture\n(B) towel bar\n(C) shower\n(D) bathtub" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.859375, "loss": 4.460309028625488 }, { "id": "identity_3418", "image": "FigureQA/images/44297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.21875, "loss": 4.412899017333984 }, { "id": "identity_11422", "image": "TextVQA/images/63b7d07254a2cbfc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is this parade float in support of?" }, { "from": "gpt", "value": "The answer is dogs for defense" } ], "clip_score": 30.78125, "loss": 4.301991939544678 }, { "id": "identity_301789", "image": "ScienceQA/images/6233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Ohio\n(B) North Carolina\n(C) Massachusetts\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.109375, "loss": 4.578834056854248 }, { "id": "identity_308771", "image": "A-OKVQA/images/39040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals with umbrellas trying to avoid?\nChoices:\n(A) rain\n(B) sun\n(C) snow\n(D) wind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.53125, "loss": 4.3920183181762695 }, { "id": "identity_199087", "image": "VQA-AS/images/16754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toys are visible on the floor?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.6875, "loss": 4.508753299713135 }, { "id": "identity_187702", "image": "A-OKVQA/images/386332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of furniture is directly behind the pizza boxes?" }, { "from": "gpt", "value": "The answer is A chair" } ], "clip_score": 29.484375, "loss": 4.235058307647705 }, { "id": "identity_328693", "image": "TextVQA/images/e54c260f1bd6f6d1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is shown on the banner?" }, { "from": "gpt", "value": "The answer is telemoucto" } ], "clip_score": 16.59375, "loss": 4.748859882354736 }, { "id": "identity_282261", "image": "VQA-AS/images/5805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the situation with the girl?" }, { "from": "gpt", "value": "The answer is crashed into log" } ], "clip_score": 25.015625, "loss": 4.143126010894775 }, { "id": "identity_112897", "image": "TextVQA/images/8774ea9366a4c286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the pedestrian crossing signal?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 30.046875, "loss": 4.086431503295898 }, { "id": "identity_204015", "image": "VQA2.0/images/528408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the teddy bear that is second from the left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.953125, "loss": 4.256278991699219 }, { "id": "identity_209483", "image": "TextVQA/images/7d7e5a98c21af824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written on the back of the jerseys?" }, { "from": "gpt", "value": "The answer is BIEDIG" } ], "clip_score": 18.109375, "loss": 4.174231052398682 }, { "id": "identity_9378", "image": "VizWiz/images/VizWiz_train_00016935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: This. Thank you." }, { "from": "gpt", "value": "The answer is ticket" } ], "clip_score": 21.0, "loss": 4.403534889221191 }, { "id": "identity_187272", "image": "PMC-VQA/images/PMC4814887_phy212730-fig-0002_16870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed above the graph?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.234375, "loss": 4.295332431793213 }, { "id": "identity_212734", "image": "VQA-AS/images/11962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth on the table?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.875, "loss": 4.060790061950684 }, { "id": "identity_261535", "image": "Super-CLEVR/images/superCLEVR_new_001457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the bicycle, which is positioned behind the tiny matte biplane, made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 31.34375, "loss": 4.314184188842773 }, { "id": "identity_282022", "image": "PMC-VQA/images/PMC5265193_Fig8_118667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of anomaly is not present in the hyperperfusion zone shown in the image?\nChoices:\n(A) FLAIR anomaly\n(B) DWI anomaly\n(C) Both FLAIR and DWI anomalies\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.375, "loss": 4.0572638511657715 }, { "id": "identity_16177", "image": "TQA/images/question_images/muscle_fiber_7090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are muscle fibers made up of?\nChoices:\n(A) Skeletal muscle\n(B) Myelin\n(C) Myofibrils\n(D) Schwann cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.734375, "loss": 4.156389236450195 }, { "id": "identity_217481", "image": "A-OKVQA/images/29525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.671875, "loss": 4.232676982879639 }, { "id": "identity_298648", "image": "A-OKVQA/images/577835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category does this truck belong to?\nChoices:\n(A) model\n(B) ladder\n(C) commercial\n(D) passenger" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.671875, "loss": 4.4059576988220215 }, { "id": "identity_289090", "image": "PMC-VQA/images/PMC5733951_fig1_254274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this image, which artery is significantly narrowed?\nChoices:\n(A) Right internal carotid artery\n(B) Left internal carotid artery\n(C) Brachiocephalic artery\n(D) Femoral artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.53125, "loss": 4.269639492034912 }, { "id": "identity_6491", "image": "A-OKVQA/images/208408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phrase is most appropriate?\nChoices:\n(A) airplane race\n(B) conflagration\n(C) downpour\n(D) earthquake" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.6015625, "loss": 5.268195629119873 }, { "id": "identity_56699", "image": "DocVQA/images/yznh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the purpose of the call to the Bureau of Parks and Recreation in Santa Fe?" }, { "from": "gpt", "value": "The answer is No info" } ], "clip_score": 21.25, "loss": 4.881148815155029 }, { "id": "identity_36487", "image": "FigureQA/images/73471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Khaki have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.578125, "loss": 4.669414520263672 }, { "id": "identity_130697", "image": "PMC-VQA/images/PMC9092047_F8_276679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is shown in the top image?" }, { "from": "gpt", "value": "The answer is Lower limb" } ], "clip_score": 20.484375, "loss": 4.024550914764404 }, { "id": "identity_21332", "image": "DVQA/images/bar_train_00176209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 27.671875, "loss": 4.238978385925293 }, { "id": "identity_190715", "image": "A-OKVQA/images/221328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman in the green saree carrying?" }, { "from": "gpt", "value": "The answer is A green bag" } ], "clip_score": 22.140625, "loss": 4.595273017883301 }, { "id": "identity_2891", "image": "PMC-VQA/images/PMC5636852_Fig1_226248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does image d represent? \nChoices:\n(A) Preoperative growth \n(B) Postoperative CT scan \n(C) Tumor recurrence/regrowth \n(D) 15 weeks following treatment " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 4.185770511627197 }, { "id": "identity_31351", "image": "Super-CLEVR/images/superCLEVR_new_000865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The purple matte car has what size?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 24.8125, "loss": 4.161266803741455 }, { "id": "identity_110322", "image": "VQA-AS/images/626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.25, "loss": 4.492323875427246 }, { "id": "identity_323363", "image": "A-OKVQA/images/383905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they relying on to move them?\nChoices:\n(A) elephants\n(B) people\n(C) car\n(D) motor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9296875, "loss": 4.024971961975098 }, { "id": "identity_276599", "image": "ScienceQA/images/15607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In row A, which establishment is represented by the letter \"i\"?\nChoices:\n(A) the fast-food restaurant\n(B) the fire department\n(C) the grocery store\n(D) the theater" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.953125, "loss": 4.5890212059021 }, { "id": "identity_27332", "image": "AI2D/images/574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The pistil is represented by what letter?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.25, "loss": 4.063282012939453 }, { "id": "identity_282201", "image": "PMC-VQA/images/PMC4631946_F15_440502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is utilized in this study?\nOptions:\n(A) Magnetic Resonance Imaging (MRI)\n(B) Single Photon Emission Computed Tomography (SPECT)\n(C) Computed Tomography (CT)\n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.375, "loss": 4.748722076416016 }, { "id": "identity_111356", "image": "A-OKVQA/images/208372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are being led across the parking lot?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.6875, "loss": 4.62294864654541 }, { "id": "identity_289082", "image": "ScienceQA/images/19596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina?\n(A) Raleigh\n(B) Louisville\n(C) Little Rock\n(D) Charlotte" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.734375, "loss": 4.265372276306152 }, { "id": "identity_131166", "image": "TextVQA/images/e294293fec6c5350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.25, "loss": 4.316595077514648 }, { "id": "identity_199272", "image": "A-OKVQA/images/393664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light poles are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.3828125, "loss": 4.6387739181518555 }, { "id": "identity_223767", "image": "TextVQA/images/bf475d45b6ec7ef1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jersey of the team on the field?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 19.25, "loss": 4.292172431945801 }, { "id": "identity_286258", "image": "Super-CLEVR/images/superCLEVR_new_000723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the colors of the school bus and the small rubber car identical?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.84375, "loss": 5.367964744567871 }, { "id": "identity_34177", "image": "PMC-VQA/images/PMC4359878_fig1_367759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the contrast-enhanced computed tomography reveal? \nChoices:\n(A) An atrial fibrillation \n(B) A pulmonary embolism \n(C) An aortic arch aneurysm surrounded by a hematoma \n(D) A kidney stone " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 4.263853073120117 }, { "id": "identity_338439", "image": "PMC-VQA/images/PMC6994512_Fig5_109337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction is the lateral aspect of the fascia iliaca relative to this? \nChoices:\n(A) Superior \n(B) Inferior \n(C) Medial \n(D) Posterior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.953125, "loss": 4.094335079193115 }, { "id": "identity_335988", "image": "TextVQA/images/b2f716a3390e71a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sleeves of the people in white?" }, { "from": "gpt", "value": "The answer is lg dios" } ], "clip_score": 16.8125, "loss": 4.032698631286621 }, { "id": "identity_185872", "image": "A-OKVQA/images/459824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the tablecloth under the tray?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 14.1015625, "loss": 4.212851524353027 }, { "id": "identity_129409", "image": "A-OKVQA/images/50350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candles are on the shelf?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.8984375, "loss": 4.814133644104004 }, { "id": "identity_16785", "image": "A-OKVQA/images/225919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who placed this animal in the box?\nChoices:\n(A) hobo\n(B) dorothy\n(C) bike rider\n(D) wicked witch" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.640625, "loss": 4.1341047286987305 }, { "id": "identity_233901", "image": "A-OKVQA/images/117071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you follow the green arrow's direction and continue straight until you reach the intersection, then turn left and walk for 200 meters, what notable landmark or building will you encounter first?" }, { "from": "gpt", "value": "The answer is A church" } ], "clip_score": 32.40625, "loss": 4.120994567871094 }, { "id": "identity_16435", "image": "FigureQA/images/61387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.422472953796387 }, { "id": "identity_17543", "image": "DocVQA/images/qybv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the program?" }, { "from": "gpt", "value": "The answer is ucla" } ], "clip_score": 28.484375, "loss": 4.502647399902344 }, { "id": "identity_280496", "image": "ScienceQA/images/2032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the options of being flexible, opaque, or translucent?\nChoices:\n(A) flexible\n(B) opaque\n(C) translucent" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.1484375, "loss": 4.542548179626465 }, { "id": "identity_146772", "image": "DocVQA/images/zjpc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is David L. Carter" } ], "clip_score": 21.5, "loss": 4.027869701385498 }, { "id": "identity_21786", "image": "A-OKVQA/images/223580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type food is more likely served here?\nChoices:\n(A) salads\n(B) filet mignon\n(C) hot dog\n(D) chile" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.4296875, "loss": 4.154324531555176 }, { "id": "identity_238228", "image": "UniGeo/images/2528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点E,F是正方形ABCD内的两个点,AB=13,AE=CF=5,BE=DF=12,求点E和点F的坐标以及线段EF的长。" }, { "from": "gpt", "value": "The answer is 7√{2}" } ], "clip_score": 29.65625, "loss": 4.231043815612793 }, { "id": "identity_136044", "image": "MapQA/images/map_6325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are colored with the lightest shade?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 25.96875, "loss": 4.00073766708374 }, { "id": "identity_273696", "image": "TQA/images/abc_question_images/human_system_muscular_16156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the triceps muscle:\n(A) X\n(B) R\n(C) U\n(D) K" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.609375, "loss": 4.850981712341309 }, { "id": "identity_188344", "image": "VQA2.0/images/513496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the obstacle that the horse is jumping over?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.109375, "loss": 5.028483867645264 }, { "id": "identity_73163", "image": "AI2D/images/485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers can you identify in this desert food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 36.03125, "loss": 4.6313629150390625 }, { "id": "identity_288857", "image": "PMC-VQA/images/PMC2409170_fig3_23603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of staining is depicted in the lower left corner? \nOptions:\n(A) PAS staining\n(B) H&E staining\n(C) Immunofluorescence staining\n(D) Giemsa staining" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.224709987640381 }, { "id": "identity_275140", "image": "A-OKVQA/images/459003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What materials make up the falling material?\nChoices:\n(A) stone\n(B) brick\n(C) cloth\n(D) water" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 4.17578125, "loss": 4.180403709411621 }, { "id": "identity_303799", "image": "FigureQA/images/25525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.703125, "loss": 4.724052429199219 }, { "id": "identity_281026", "image": "AI2D/images/3771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is urine released?\nChoices:\n(A) A\n(B) I\n(C) E\n(D) G" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 4.050908088684082 }, { "id": "identity_50569", "image": "DocVQA/images/rpgn0226_90.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product mentioned at the bottom of the document?" }, { "from": "gpt", "value": "The answer is Ultima MOM Total Hip System--IDE" } ], "clip_score": 23.75, "loss": 6.374772071838379 }, { "id": "identity_120819", "image": "TextVQA/images/00ac03de349a3c5b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any markings on the fruit other than the letters?" }, { "from": "gpt", "value": "The answer is Yes, small black lines" } ], "clip_score": 28.015625, "loss": 4.299184799194336 }, { "id": "identity_308208", "image": "A-OKVQA/images/207486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the creature currently sitting?\nChoices:\n(A) street\n(B) park\n(C) boardwalk\n(D) beach" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.53125, "loss": 4.2373247146606445 }, { "id": "identity_121790", "image": "A-OKVQA/images/243071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written on the side of the motorcycle closest to the camera?" }, { "from": "gpt", "value": "The answer is Raccoon Ink Side" } ], "clip_score": 25.609375, "loss": 4.949134826660156 }, { "id": "identity_278334", "image": "PMC-VQA/images/PMC7405101_ccr33163-fig-0004_227594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In case 4, what abnormality is observed in the aorta?\nChoices:\n(A) An aneurysm\n(B) A tear\n(C) A dissection\n(D) It is normal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.375, "loss": 4.635452747344971 }, { "id": "identity_326533", "image": "VizWiz/images/VizWiz_train_00003115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is shown in this image?" }, { "from": "gpt", "value": "The answer is cup" } ], "clip_score": 24.765625, "loss": 4.409782409667969 }, { "id": "identity_332085", "image": "PMC-VQA/images/PMC6439010_Fig7_458886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiographic finding indicative of a fungal ball in sinusitis?\nChoices:\n(A) Radiolucent area\n(B) Radiopacity in the middle\n(C) Calcified lesion\n(D) Empty space" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.96875, "loss": 4.59761905670166 }, { "id": "identity_166747", "image": "TabMWP/images/16236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which class has the longest duration?" }, { "from": "gpt", "value": "The answer is History" } ], "clip_score": 25.71875, "loss": 4.2084856033325195 }, { "id": "identity_305287", "image": "AI2D/images/253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given diagram, a fox is a:\nChoices:\n(A) Predator\n(B) Prey\n(C) Decomposer\n(D) Producer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.578125, "loss": 4.36148738861084 }, { "id": "identity_105331", "image": "TextVQA/images/e0e12dcdc5f44987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company advertised on the clock?" }, { "from": "gpt", "value": "The answer is URBE Publicidad" } ], "clip_score": 24.84375, "loss": 5.0296549797058105 }, { "id": "identity_286525", "image": "DVQA/images/bar_train_00138552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.484375, "loss": 4.331486701965332 }, { "id": "identity_327119", "image": "PMC-VQA/images/PMC4638099_Fig1_442502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the white matter on this type of imaging?\nChoices:\n(A) Homogeneous\n(B) Hypointense\n(C) Heterogeneous\n(D) Hyperintense" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.59375, "loss": 4.031909942626953 }, { "id": "identity_299942", "image": "A-OKVQA/images/314515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the people participating in at this gathering?\nChoices:\n(A) vacation\n(B) protest\n(C) cycling event\n(D) picnic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.890625, "loss": 4.323628902435303 }, { "id": "identity_212687", "image": "TextVQA/images/0d0cdf59c4561f03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the majority of the crowd's clothing?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 23.796875, "loss": 4.025784969329834 }, { "id": "identity_267230", "image": "AI2D/images/3642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the layer located between the left atrium and the left coronary artery?\nChoices:\n(A) aorta\n(B) pulmonary artery\n(C) left pulmonary veins\n(D) descending aorta" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.71875, "loss": 4.47491455078125 }, { "id": "identity_319305", "image": "TQA/images/question_images/parts_worm_7311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of creature is depicted in the image?\nChoices:\n(A) Leech\n(B) Fish\n(C) Crocodile\n(D) Bird" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.453125, "loss": 4.432621479034424 }, { "id": "identity_29303", "image": "PMC-VQA/images/PMC6326132_gf0100_420639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body did the imaging focus on?\nChoices:\n(A) Right upper limb \n(B) Left lower limb \n(C) Abdomen \n(D) Head " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.9375, "loss": 4.870346546173096 }, { "id": "identity_290691", "image": "PMC-VQA/images/PMC6360734_Fig1_431568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which chamber of the heart was the solid tumor located?\nChoices:\n(A) Right atrium\n(B) Left atrium\n(C) Right ventricle\n(D) Left ventricle." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.859375, "loss": 4.547222137451172 }, { "id": "identity_326356", "image": "PMC-VQA/images/PMC7570195_micromachines-11-00797-f012_277661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy image is shown for this? \nChoices:\n(A) CT scan image \n(B) TEM image \n(C) Optical microscopy image \n(D) X-ray image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 5.170663833618164 }, { "id": "identity_135492", "image": "DocVQA/images/znyj0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the fax cover sheet?" }, { "from": "gpt", "value": "The answer is Attached Letter" } ], "clip_score": 24.171875, "loss": 4.506704807281494 }, { "id": "identity_269851", "image": "IconQA/images/59147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rectangles an even or odd number?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0625, "loss": 4.247575759887695 }, { "id": "identity_269675", "image": "AI2D/images/3311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the long extending structure located at the base of the figure?\nChoices:\n(A) pill\n(B) ribosomes\n(C) flagella\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.359375, "loss": 4.189121246337891 }, { "id": "identity_5889", "image": "FigureQA/images/16020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.265625, "loss": 4.03149938583374 }, { "id": "identity_335588", "image": "AI2D/images/3705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the above diagram?\nChoices:\n(A) kidney\n(B) brain\n(C) urinary bladder\n(D) heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.192887306213379 }, { "id": "identity_107015", "image": "TextVQA/images/aacff7cc99ea777a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many words are capitalized on the poster?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.21875, "loss": 4.754197597503662 }, { "id": "identity_332252", "image": "A-OKVQA/images/505933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is shown here?\nChoices:\n(A) city\n(B) forest\n(C) ocean\n(D) desert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5625, "loss": 4.336878299713135 }, { "id": "identity_328715", "image": "TabMWP/images/27164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.375, "loss": 4.222548484802246 }, { "id": "identity_13869", "image": "FigureQA/images/91965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.1875, "loss": 4.1984381675720215 }, { "id": "identity_286615", "image": "PMC-VQA/images/PMC8374203_fig0002_37186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the image provided?\nChoices:\n(A) Bacteria \n(B) Fungi \n(C) Viruses \n(D) Protozoa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.765625, "loss": 4.55914831161499 }, { "id": "identity_327458", "image": "TQA/images/abc_question_images/parts_neuron_17224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these letters indicates the part of the nerve cell that receives signals from other cells?\nChoices:\n(A) K\n(B) L\n(C) R\n(D) U" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0625, "loss": 4.092164039611816 }, { "id": "identity_120750", "image": "A-OKVQA/images/64149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any duplicate books on the bookshelf?" }, { "from": "gpt", "value": "The answer is No duplicates visible" } ], "clip_score": 21.859375, "loss": 5.068776607513428 }, { "id": "identity_280808", "image": "Super-CLEVR/images/superCLEVR_new_001696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small yellow matte biplanes in front of the brown matte dirtbike compared to the number of large objects behind the small yellow matte airplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.328125, "loss": 5.062146186828613 }, { "id": "identity_276068", "image": "PMC-VQA/images/PMC7137947_RSOS192179F2_145974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the images depict?\nChoices:\n(A) Various cell types.\n(B) The same sample at various preparation stages.\n(C) Different samples at the same preparation stage.\n(D) Anatomical structures of diverse organisms." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4765625, "loss": 4.085697174072266 }, { "id": "identity_27919", "image": "PMC-VQA/images/PMC3504249_fig2_167977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the gaseous necrosis seen in the computed tomography? \nChoices:\n(A) Along the vertebral column and spinal cord. \n(B) Around the lungs and thorax. \n(C) Along the iliopsoas and anterior abdominal wall. \n(D) Around the heart and mediastinum. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.46875, "loss": 4.223401069641113 }, { "id": "identity_4334", "image": "A-OKVQA/images/34597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the maneuver the skaters will do next?\nChoices:\n(A) ollie\n(B) manual\n(C) dropping in\n(D) kick flip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.203125, "loss": 4.342543125152588 }, { "id": "identity_330248", "image": "TabMWP/images/4130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 4.411660671234131 }, { "id": "identity_26753", "image": "A-OKVQA/images/158787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two women in the foreground carrying?\nChoices:\n(A) pumpkins\n(B) purses\n(C) eggs\n(D) cats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.8671875, "loss": 4.491497039794922 }, { "id": "identity_303390", "image": "Super-CLEVR/images/superCLEVR_new_000529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that is the same color as this small matte regular bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.59375, "loss": 4.970983982086182 }, { "id": "identity_286266", "image": "PMC-VQA/images/PMC5603110_fig1_216907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where in the mandible is viable cortical bone located at the middle level? \n(A) Anterior margin \n(B) Lateral margin \n(C) Posterior margin \n(D) Medial margin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.109375, "loss": 4.028401851654053 }, { "id": "identity_29184", "image": "IconQA/images/69597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is second?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.786988258361816 }, { "id": "identity_266577", "image": "AI2D/images/2236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage in the life cycle does the letter E in the diagram correspond to? Is it (A) young larva, (B) eggs, (C) pupa in chrysalis, or (D) older larva?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.96875, "loss": 4.597317218780518 }, { "id": "identity_131405", "image": "PMC-VQA/images/PMC3902410_F1_261444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of blood flow in the visible coronary arteries?" }, { "from": "gpt", "value": "The answer is From larger to smaller branches" } ], "clip_score": 28.625, "loss": 4.066036701202393 }, { "id": "identity_320172", "image": "VQA2.0/images/514457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bear wearing?" }, { "from": "gpt", "value": "The answer is furr" } ], "clip_score": 16.421875, "loss": 5.915040493011475 }, { "id": "identity_131644", "image": "VizWiz/images/VizWiz_train_00009589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many knives are visible in the knife block behind the bottle?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.671875, "loss": 4.5869269371032715 }, { "id": "identity_297382", "image": "IconQA/images/41930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of squares is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.90625, "loss": 4.0243821144104 }, { "id": "identity_295147", "image": "PMC-VQA/images/PMC3917527_F1_265368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the images?\nOptions:\n(A) CT scan\n(B) T1-weighted MRI\n(C) T2-weighted MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.59375, "loss": 4.245447158813477 }, { "id": "identity_227785", "image": "ScienceQA/images/10485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, and then name the capital of the state directly south of it" }, { "from": "gpt", "value": "The answer is Dover" } ], "clip_score": 29.8125, "loss": 4.116867542266846 }, { "id": "identity_299703", "image": "A-OKVQA/images/424434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following is the person with the hat sitting?\n(A) quicksand\n(B) sand box\n(C) mud\n(D) boat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.296875, "loss": 4.5369744300842285 }, { "id": "identity_237551", "image": "VizWiz/images/VizWiz_train_00015030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main components of this object and their functions?" }, { "from": "gpt", "value": "The answer is Lever, blades, pin, file and their respective functions" } ], "clip_score": 28.578125, "loss": 4.106601715087891 }, { "id": "identity_191076", "image": "A-OKVQA/images/24257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign hanging from the ceiling on the right?" }, { "from": "gpt", "value": "The answer is Station Cleaning" } ], "clip_score": 19.53125, "loss": 4.112298011779785 }, { "id": "identity_12512", "image": "ScienceQA/images/8125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Alaska?\nChoices:\n(A) Seattle\n(B) Juneau\n(C) Saint Paul\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.3511786460876465 }, { "id": "identity_193592", "image": "PMC-VQA/images/PMC6353974_Fig3_429551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the patient's orientation in this image?" }, { "from": "gpt", "value": "The answer is Yes, standard CT orientation" } ], "clip_score": 30.25, "loss": 5.113152503967285 }, { "id": "identity_334888", "image": "A-OKVQA/images/125275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the fruit shown have a high level of?\nChoices:\n(A) magnesium\n(B) potassium\n(C) vitamin\n(D) vitamin b" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.2956461906433105 }, { "id": "identity_338215", "image": "A-OKVQA/images/496362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these individuals standing?\nChoices:\n(A) grass\n(B) street\n(C) trail\n(D) beach" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.369809150695801 }, { "id": "identity_4614", "image": "PMC-VQA/images/PMC5649724_F2_229375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe is affected by the lesion? \nChoices:\n(A) Left lobe \n(B) Right lobe \n(C) Middle lobe \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.53125, "loss": 4.291203498840332 }, { "id": "identity_157600", "image": "AI2D/images/2105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.328125, "loss": 4.154043674468994 }, { "id": "identity_324137", "image": "A-OKVQA/images/308900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what type of setting is this place located?\nChoices:\n(A) suburban\n(B) desert\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.953125, "loss": 4.5485944747924805 }, { "id": "identity_294337", "image": "FigureQA/images/38243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.7265625, "loss": 4.293671607971191 }, { "id": "identity_30920", "image": "DocVQA/images/ftmf0024_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heading of the document?" }, { "from": "gpt", "value": "The answer is Prelaunch Smoker Base Program" } ], "clip_score": 19.640625, "loss": 6.279976844787598 }, { "id": "identity_326970", "image": "PMC-VQA/images/PMC7939179_F4_385961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image point to?\nChoices:\n(A) A fracture in the skull \n(B) A low-density shadow at frontal lobe \n(C) A high-density shadow at basal ganglia \n(D) A high-density shadow at cerebral cortex" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.515625, "loss": 4.602753639221191 }, { "id": "identity_68752", "image": "ScienceQA/images/15793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals of the species are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.296875, "loss": 4.574887275695801 }, { "id": "identity_269860", "image": "PMC-VQA/images/PMC5540294_Fig4_198895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in images c and d?\nChoices:\n(A) Normal detached cells\n(B) Ruptured detached cells\n(C) Merofusosomes\n(D) Absent merozoite formation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.765625, "loss": 4.327033519744873 }, { "id": "identity_307699", "image": "VQA2.0/images/106635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ground?" }, { "from": "gpt", "value": "The answer is no dirt" } ], "clip_score": 11.15625, "loss": 4.885222434997559 }, { "id": "identity_306618", "image": "PMC-VQA/images/PMC8871003_diagnostics-12-00440-f001_203289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To where did the abscess extend?\nChoices:\n(A) right parietocolic wall\n(B) left parietocolic wall\n(C) rectum\n(D) bladder" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.953125, "loss": 4.445906162261963 }, { "id": "identity_2720", "image": "DVQA/images/bar_train_00007198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the red color represent?" }, { "from": "gpt", "value": "The answer is beam" } ], "clip_score": 26.984375, "loss": 6.13008451461792 }, { "id": "identity_292534", "image": "ScienceQA/images/9111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\nChoices:\n(A) Lincoln\n(B) Des Moines\n(C) Cedar Rapids\n(D) Davenport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.216269493103027 }, { "id": "identity_31689", "image": "A-OKVQA/images/341736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which way is the one way arrow pointing?\nChoices:\n(A) up\n(B) down\n(C) left\n(D) right" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.015625, "loss": 4.348694324493408 }, { "id": "identity_118481", "image": "PMC-VQA/images/PMC8961915_Fig3_237822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label \"C-3\" likely indicating in the image?" }, { "from": "gpt", "value": "The answer is Case number or classification" } ], "clip_score": 25.578125, "loss": 4.336752891540527 }, { "id": "identity_280627", "image": "TQA/images/abc_question_images/human_system_digestive_13687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter on the diagram indicates the location where bile is produced?\nChoices:\n(A) M\n(B) P\n(C) J\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.6875, "loss": 4.191054344177246 }, { "id": "identity_19386", "image": "A-OKVQA/images/173759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is the flag painted on the motorcycle above belong to?\nChoices:\n(A) united states\n(B) switzerland\n(C) great britain\n(D) mexico" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.9375, "loss": 4.068968296051025 }, { "id": "identity_288276", "image": "PMC-VQA/images/PMC3180278_pone-0025033-g001_110019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI images are being utilized as a template?\nOptions:\n(A) 2D-SSP images\n(B) CT images\n(C) 3D-SSP images\n(D) T1-weighted MRI images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.609375, "loss": 4.066033363342285 }, { "id": "identity_25329", "image": "IconQA/images/77021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is purple?\nChoices:\n(A) 1/2\n(B) 1/3\n(C) 1/4\n(D) 1/5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.0335164070129395 }, { "id": "identity_321125", "image": "ScienceQA/images/11631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Memphis\n(C) Richmond\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.421875, "loss": 4.250364780426025 }, { "id": "identity_196524", "image": "A-OKVQA/images/17882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vendor tent is visible in the image?" }, { "from": "gpt", "value": "The answer is Information" } ], "clip_score": 18.546875, "loss": 4.775922775268555 }, { "id": "identity_263322", "image": "TextVQA/images/25c5a8b20a59c38b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a promotional advertisement?" }, { "from": "gpt", "value": "The answer is packers" } ], "clip_score": 15.328125, "loss": 5.846078872680664 }, { "id": "identity_326518", "image": "A-OKVQA/images/334320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the animals doing with their bodies?\nChoices:\n(A) laying down\n(B) crawling\n(C) standing up\n(D) rolling around" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.890625, "loss": 4.369468688964844 }, { "id": "identity_319006", "image": "FigureQA/images/80425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.75, "loss": 4.688419342041016 }, { "id": "identity_109516", "image": "VQA2.0/images/436046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red cars are parked on the street?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.71875, "loss": 4.632238864898682 }, { "id": "identity_33502", "image": "IconQA/images/1485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.984375, "loss": 4.706221103668213 }, { "id": "identity_307302", "image": "FigureQA/images/62147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest line?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.6328125, "loss": 4.078973770141602 }, { "id": "identity_305179", "image": "TextVQA/images/3658ba6668c2255b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letters are on this person's shirt?" }, { "from": "gpt", "value": "The answer is pe" } ], "clip_score": 15.3671875, "loss": 4.944438457489014 }, { "id": "identity_133159", "image": "A-OKVQA/images/577122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the person's headwear?" }, { "from": "gpt", "value": "The answer is Tropical or leaf pattern" } ], "clip_score": 13.421875, "loss": 4.494958400726318 }, { "id": "identity_204395", "image": "TextVQA/images/d40ca05c8c584564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different flavors of Pocky are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.421875, "loss": 4.265379905700684 }, { "id": "identity_130737", "image": "TextVQA/images/f1f3d3076611852c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aircraft is visible in the image?" }, { "from": "gpt", "value": "The answer is Bush plane" } ], "clip_score": 20.984375, "loss": 4.081874370574951 }, { "id": "identity_1685", "image": "PMC-VQA/images/PMC7433362_Fig3_236391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side is the case located in? \nChoices:\n(A) Right hip \n(B) Left hip \n(C) Cannot be determined \n(D) Both hips " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.390625, "loss": 4.398975372314453 }, { "id": "identity_295924", "image": "PMC-VQA/images/PMC4403829_Fig1_379314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk in the image signify? Is it:\n(A) Right prostate lobe\n(B) Urinary bladder\n(C) Left prostate lobe\n(D) Penis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.140625, "loss": 4.647548675537109 }, { "id": "identity_263085", "image": "ScienceQA/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the organism that serves as the secondary consumer in this food web:\n(A) zooplankton\n(B) phytoplankton\n(C) kelp\n(D) plainfin midshipman" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.4375, "loss": 4.020268440246582 }, { "id": "identity_145899", "image": "DocVQA/images/khyl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the letter number mentioned at the top of the document?" }, { "from": "gpt", "value": "The answer is #5/576" } ], "clip_score": 20.75, "loss": 4.266083717346191 }, { "id": "identity_296796", "image": "A-OKVQA/images/245361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term is associated with this event: goal, homerun, surfs up, or balance beam?\nChoices:\n(A) goal\n(B) homerun\n(C) surfs up\n(D) balance beam" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.237714767456055 }, { "id": "identity_305622", "image": "PMC-VQA/images/PMC5066513_Fig14_84410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the thickening most prominent in the retina?\nChoices:\n(A) Superiorly\n(B) Inferiorly\n(C) Nasally\n(D) Temporally" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.9375, "loss": 4.133393287658691 }, { "id": "identity_308392", "image": "AI2D/images/3311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the long extending structure at the base of this?\nChoices:\n(A) pill\n(B) ribosomes\n(C) flagella\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.384528636932373 }, { "id": "identity_38527", "image": "PMC-VQA/images/PMC5457022_materials-09-00958-f009_174071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrows in the image? \nChoices:\n(A) Untreated implants \n(B) Artificial material \n(C) Hard tissue \n(D) Soft tissue " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.09375, "loss": 4.568624019622803 }, { "id": "identity_28923", "image": "A-OKVQA/images/168903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the symbols on the top yellow sign look like?\nChoices:\n(A) numbers\n(B) hieroglyphics\n(C) roman numerals\n(D) hanzi" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.390625, "loss": 4.101985931396484 }, { "id": "identity_217478", "image": "A-OKVQA/images/29525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of construction is happening in the image?" }, { "from": "gpt", "value": "The answer is Subway construction" } ], "clip_score": 23.78125, "loss": 4.321904182434082 }, { "id": "identity_5101", "image": "ScienceQA/images/5276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Tennessee?\nChoices:\n(A) Nashville\n(B) Knoxville\n(C) Boulder\n(D) Memphis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.34375, "loss": 4.314998626708984 }, { "id": "identity_275067", "image": "A-OKVQA/images/79645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the yellow part at the front of the train used?\nChoices:\n(A) bumper\n(B) passenger exit\n(C) decoration\n(D) emergency exit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.0, "loss": 4.588719844818115 }, { "id": "identity_135552", "image": "PlotQA/images/41903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any category show a decrease in life expectancy from 1960 to 1962?" }, { "from": "gpt", "value": "The answer is No decrease observed" } ], "clip_score": 27.09375, "loss": 5.374893665313721 }, { "id": "identity_64504", "image": "ScienceQA/images/8237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many highlighted regions are there in total?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 21.078125, "loss": 4.316522121429443 }, { "id": "identity_279953", "image": "Super-CLEVR/images/superCLEVR_new_001223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the sizes of the cyan truck and the green fighter the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.609375, "loss": 5.499090671539307 }, { "id": "identity_25217", "image": "PMC-VQA/images/PMC6430261_Fig5_455714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in panel (d) of the image? \nChoices:\n(A) Injected activity of the pig 13 \n(B) SUVmax of the pig 13 \n(C) Location of the osteomyelytic lesion in pig 13 \n(D) Size of the osteomyelytic lesion in pig 13 " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.609375, "loss": 4.515715599060059 }, { "id": "identity_325575", "image": "PMC-VQA/images/PMC5362666_Fig4_144476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the colour used to show the manually delineated tumour contours in this image?\nChoices:\n(A) Blue\n(B) White\n(C) Orange\n(D) Black" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.40625, "loss": 4.172144889831543 }, { "id": "identity_141406", "image": "FigureQA/images/44179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar has the second highest value?" }, { "from": "gpt", "value": "The answer is Dark Gold" } ], "clip_score": 21.59375, "loss": 4.999509811401367 }, { "id": "identity_199045", "image": "VQA2.0/images/364361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.5, "loss": 4.402120113372803 }, { "id": "identity_274211", "image": "PMC-VQA/images/PMC8128403_f3_443737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of angiogram was conducted? \nOptions:\n(A) Coronary angiogram\n(B) Cerebral angiogram\n(C) Retrograde angiogram of the left common femoral artery\n(D) Retrograde angiogram of the right common femoral artery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.578125, "loss": 4.188464641571045 }, { "id": "identity_114", "image": "AI2D/images/88.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does A represent in the diagram?\nChoices:\n(A) none of the above\n(B) night\n(C) year\n(D) day" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.59375, "loss": 4.106259346008301 }, { "id": "identity_308152", "image": "ScienceQA/images/8678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could the test show?\nChoices:\n(A) how much the drone weighed with the blade guards\n(B) if adding the blade guards made the drone fly poorly\n(C) if the blade guards would break in a crash" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.074243068695068 }, { "id": "identity_11324", "image": "Super-CLEVR/images/superCLEVR_new_001286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of green objects in front of the big brown metallic thing less than the number of large purple cars that are behind the large rubber car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.640625, "loss": 4.810013771057129 }, { "id": "identity_295356", "image": "Super-CLEVR/images/superCLEVR_new_001580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more green minivans on the left side of the yellow aeroplane than small green motorbikes behind the small green motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.0625, "loss": 4.920994281768799 }, { "id": "identity_15110", "image": "IconQA/images/101935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a tractor. Which picture is fourth?\nChoices:\n(A) sun\n(B) barn\n(C) tractor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.546875, "loss": 4.089788436889648 }, { "id": "identity_274285", "image": "FigureQA/images/42910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Blue or Light Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.54012393951416 }, { "id": "identity_13175", "image": "A-OKVQA/images/60992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the company make whose name appears on the right side of the wall?\nChoices:\n(A) witcher potions\n(B) crafts\n(C) baseball bats\n(D) eyeglasses" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.2734375, "loss": 4.235605239868164 }, { "id": "identity_13528", "image": "VQA-RAD/images/synpic17145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which diaphragm is elevated?" }, { "from": "gpt", "value": "The answer is right" } ], "clip_score": 24.78125, "loss": 4.119744300842285 }, { "id": "identity_7634", "image": "AI2D/images/4223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what form of rock is shown at point B?\nChoices:\n(A) surface rock\n(B) metamorphic\n(C) igneous\n(D) sediment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.125, "loss": 4.780104637145996 }, { "id": "identity_15975", "image": "A-OKVQA/images/472439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who was the last person to have a perfect game from that position on the left?\nChoices:\n(A) mark mcgwire\n(B) felix hernandez\n(C) philip humber\n(D) matt cain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 4.268757343292236 }, { "id": "identity_206336", "image": "TextVQA/images/47b78464e24b22f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the brand name appear on the clock face?" }, { "from": "gpt", "value": "The answer is One large, multiple small" } ], "clip_score": 28.59375, "loss": 4.302886486053467 }, { "id": "identity_288677", "image": "A-OKVQA/images/172478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which area of London does the bus travel? Is it:\n(A) south\n(B) west\n(C) central\n(D) north" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.828125, "loss": 4.228977203369141 }, { "id": "identity_288227", "image": "A-OKVQA/images/1053.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is prohibited if you are in a car approaching from this direction?\nChoices:\n(A) going forwards\n(B) turning right\n(C) waiting\n(D) turning left" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.796875, "loss": 4.237335205078125 }, { "id": "identity_266544", "image": "FigureQA/images/88081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.671875, "loss": 4.698834419250488 }, { "id": "identity_276700", "image": "A-OKVQA/images/166173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items must come into contact with the ground to successfully stick the landing? \n(A) board\n(B) sneakers\n(C) wheels\n(D) lining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.984375, "loss": 4.518050193786621 }, { "id": "identity_315714", "image": "PMC-VQA/images/PMC3474213_fig4_160836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of epithelium covers the indentation of the tissue in the image?\nChoices:\n(A) Simple columnar \n(B) Stratified squamous \n(C) Pseudostratified \n(D) Cuboidal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.078125, "loss": 4.017224311828613 }, { "id": "identity_124280", "image": "A-OKVQA/images/328810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets on the fridge are of primary colors?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.046875, "loss": 4.451282501220703 }, { "id": "identity_334402", "image": "VQA2.0/images/378090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this style?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 9.34375, "loss": 5.445422172546387 }, { "id": "identity_120690", "image": "VQA2.0/images/102168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the machine have a logo or brand visible?" }, { "from": "gpt", "value": "The answer is Yes, Franklin Sports logo" } ], "clip_score": 19.109375, "loss": 4.5609564781188965 }, { "id": "identity_152035", "image": "ScienceQA/images/4704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.34375, "loss": 4.2251691818237305 }, { "id": "identity_323051", "image": "PMC-VQA/images/PMC5076298_f3_87031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to obtain these images?\nChoices:\n(A) X-ray imaging\n(B) Computed Tomography imaging\n(C) Magnetic Resonance Imaging\n(D) Ultrasound imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.5625, "loss": 4.667635917663574 }, { "id": "identity_261378", "image": "A-OKVQA/images/238459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colors are present on the cow that is nearest to the camera?\nChoices:\n(A) orange\n(B) blue\n(C) brown white\n(D) black" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.1875, "loss": 4.109205722808838 }, { "id": "identity_216043", "image": "TextVQA/images/a9f3b1fa9c7caabd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible riding bicycles in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.625, "loss": 4.227687835693359 }, { "id": "identity_8343", "image": "ScienceQA/images/3026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Colorado?\nChoices:\n(A) Colorado Springs\n(B) Boulder\n(C) Denver\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.375, "loss": 4.055752754211426 }, { "id": "identity_17349", "image": "VQA-RAD/images/synpic60703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of artifact is shown in this image?" }, { "from": "gpt", "value": "The answer is Motion" } ], "clip_score": 22.21875, "loss": 4.335653781890869 }, { "id": "identity_30450", "image": "PMC-VQA/images/PMC6487516_Fig2_472500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis of the lesion in the right frontal lobe? \nChoices:\n(A) Meningioma \n(B) Glioma \n(C) Metastasis \n(D) Abscess " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.84375, "loss": 4.208867073059082 }, { "id": "identity_288513", "image": "IconQA/images/80174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) impossible\n(C) probable\n(D) unlikely" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.75, "loss": 4.121503829956055 }, { "id": "identity_276045", "image": "ScienceQA/images/2261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated in the highlighted area?\nChoices:\n(A) North America\n(B) Europe\n(C) Africa\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.59375, "loss": 4.276939392089844 }, { "id": "identity_6012", "image": "ScienceQA/images/18397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) scratchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.25, "loss": 4.202367305755615 }, { "id": "identity_140404", "image": "MapQA/images/map_10793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have trustees in the range of 608-684?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 26.125, "loss": 4.1035614013671875 }, { "id": "identity_312565", "image": "A-OKVQA/images/94052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on top of this table?\nChoices:\n(A) cat\n(B) bananas\n(C) fish heads\n(D) yule log" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8125, "loss": 4.287361145019531 }, { "id": "identity_131635", "image": "A-OKVQA/images/20671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue objects can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.875, "loss": 4.869750022888184 }, { "id": "identity_205082", "image": "A-OKVQA/images/491440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stops are there in total on the downtown loop service?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 24.984375, "loss": 5.117830753326416 }, { "id": "identity_295074", "image": "ScienceQA/images/5617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.21875, "loss": 4.2127180099487305 }, { "id": "identity_265722", "image": "DVQA/images/bar_train_00009106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which holds a higher value, gold in art or a father in a gang?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 24.25, "loss": 4.498903274536133 }, { "id": "identity_327639", "image": "PMC-VQA/images/PMC6365560_Fig3_433160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in this image?\nChoices:\n(A) anterior end \n(B) posterior end \n(C) ligule \n(D) stem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.71875, "loss": 5.6186699867248535 }, { "id": "identity_282097", "image": "DocVQA/images/ksjf0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of this document?" }, { "from": "gpt", "value": "The answer is Stacey Dixon Calahan" } ], "clip_score": 24.90625, "loss": 4.172819137573242 }, { "id": "identity_10394", "image": "TQA/images/question_images/human_system_eye_6131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the retina located?\nChoices:\n(A) Surrounding the macula\n(B) Surrounding the iris\n(C) Surrounding the vitreous gel\n(D) Surrounding the pupil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.234375, "loss": 4.659927845001221 }, { "id": "identity_264359", "image": "FigureQA/images/70905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.801142692565918 }, { "id": "identity_11281", "image": "ScienceQA/images/15366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) yellow\n(B) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.296875, "loss": 4.39450740814209 }, { "id": "identity_291226", "image": "A-OKVQA/images/237464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of weather are most people at the beach hoping for today?\nChoices:\n(A) wind\n(B) sleet\n(C) rain\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.03125, "loss": 4.233290672302246 }, { "id": "identity_303616", "image": "AI2D/images/2933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is E in this diagram?\nChoices:\n(A) heart wood\n(B) bark\n(C) cambium\n(D) sap wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.71875, "loss": 4.31937313079834 }, { "id": "identity_68993", "image": "AI2D/images/3123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main difference between the 'Fused' and 'Open' leaf sheath structures?" }, { "from": "gpt", "value": "The answer is Connection to the stem" } ], "clip_score": 41.78125, "loss": 4.99626350402832 }, { "id": "identity_263415", "image": "FigureQA/images/33751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Crimson or Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.203125, "loss": 4.587786674499512 }, { "id": "identity_222747", "image": "A-OKVQA/images/162322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many donuts are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.671875, "loss": 4.5285725593566895 }, { "id": "identity_289393", "image": "IconQA/images/39223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.079160690307617 }, { "id": "identity_283132", "image": "A-OKVQA/images/37678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mathematical operation should be performed on the numbers to reach the era of steam power?\nChoices:\n(A) division\n(B) multiplication\n(C) subtraction\n(D) addition" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.65625, "loss": 4.232478618621826 }, { "id": "identity_329541", "image": "AI2D/images/4884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label for the island arc?\nChoices:\n(A) b\n(B) d\n(C) a\n(D) c" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.015625, "loss": 4.346377372741699 }, { "id": "identity_330827", "image": "VQA2.0/images/360238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of sign is here in the middle of the road?" }, { "from": "gpt", "value": "The answer is license plate" } ], "clip_score": 15.3359375, "loss": 4.015030860900879 }, { "id": "identity_32670", "image": "PMC-VQA/images/PMC7034755_FIG1_122260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the PE located in this image? \nChoices:\n(A) Left Pulmonary Artery \n(B) Bronchus \n(C) Right Pulmonary Artery \n(D) Trachea " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.546875, "loss": 4.4416680335998535 }, { "id": "identity_26148", "image": "PMC-VQA/images/PMC7786473_Fig6_335682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How do the postoperative radiographic images appear? \nChoices:\n(A) poor alignment \n(B) fair alignment \n(C) good alignment \n(D) excellent alignment. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.375, "loss": 4.256968021392822 }, { "id": "identity_14602", "image": "A-OKVQA/images/493227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who manufactured the car on the right?\nChoices:\n(A) lexus\n(B) audi\n(C) bmw\n(D) mercedes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.890625, "loss": 4.261050701141357 }, { "id": "identity_123800", "image": "VQA2.0/images/5115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the room's walls?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 24.765625, "loss": 5.240682125091553 }, { "id": "identity_35914", "image": "IconQA/images/51535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is ninth?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.578125, "loss": 4.188782215118408 }, { "id": "identity_203798", "image": "PMC-VQA/images/PMC7410867_Fig1_229883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.828125, "loss": 4.184482574462891 }, { "id": "identity_15449", "image": "ScienceQA/images/18354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's mouth is also adapted for bottom feeding?\nChoices:\n(A) emperor angelfish\n(B) spotted eagle ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 4.206355571746826 }, { "id": "identity_17455", "image": "FigureQA/images/50088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.25, "loss": 4.330737113952637 }, { "id": "identity_298456", "image": "A-OKVQA/images/431092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is the man performing with the blue wand?\nChoices:\n(A) waving\n(B) digging\n(C) scratching\n(D) playing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.953125, "loss": 4.142578601837158 }, { "id": "identity_330668", "image": "A-OKVQA/images/283101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of object powers these things?\nChoices:\n(A) engine\n(B) battery\n(C) paddle\n(D) sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.3515625, "loss": 4.055675029754639 }, { "id": "identity_279384", "image": "FigureQA/images/85061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the area under the curve with the minimum value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.078125, "loss": 4.1010966300964355 }, { "id": "identity_112423", "image": "VizWiz/images/VizWiz_train_00023702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the software that is displaying the \"Forgot password help\" window?" }, { "from": "gpt", "value": "The answer is Zello" } ], "clip_score": 32.53125, "loss": 4.744409084320068 }, { "id": "identity_190871", "image": "Super-CLEVR/images/superCLEVR_new_001578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is furthest to the right?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.390625, "loss": 4.214887619018555 }, { "id": "identity_327066", "image": "PMC-VQA/images/PMC8780928_btm210261-fig-0004_173014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) ADSC isolation \n(B) ADSC proliferation \n(C) ADSC osteogenic differentiation \n(D) ADSC adipogenic differentiation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.453125, "loss": 4.184042930603027 }, { "id": "identity_48685", "image": "DocVQA/images/zxlb0019_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the accountable executive for the project?" }, { "from": "gpt", "value": "The answer is Carl Ehmann" } ], "clip_score": 18.359375, "loss": 4.280196666717529 }, { "id": "identity_190887", "image": "PMC-VQA/images/PMC6219077_Fig3_391786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the background in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.046875, "loss": 4.222697734832764 }, { "id": "identity_320393", "image": "A-OKVQA/images/260345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in this place looking to do?\nChoices:\n(A) paint\n(B) shop\n(C) investigate\n(D) race" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.3125, "loss": 4.382953643798828 }, { "id": "identity_217205", "image": "A-OKVQA/images/72769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children are wearing party hats?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.890625, "loss": 4.8499755859375 }, { "id": "identity_109958", "image": "Super-CLEVR/images/superCLEVR_new_000027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned to the right of the green truck?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 22.859375, "loss": 4.268855571746826 }, { "id": "identity_333814", "image": "ScienceQA/images/18289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What describes the ecosystem of the Tallgrass Prairie National Preserve?\nChoices:\n(A) It has hot summers and cool winters.\n(B) It has soil that is poor in nutrients." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.796875, "loss": 4.224921226501465 }, { "id": "identity_202020", "image": "PMC-VQA/images/PMC4070323_fig2_300953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical term for the region indicated by the short arrow in the image?" }, { "from": "gpt", "value": "The answer is Forefoot" } ], "clip_score": 27.03125, "loss": 4.0610785484313965 }, { "id": "identity_286389", "image": "A-OKVQA/images/189153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the item on the baseball bat used for?\nChoices:\n(A) warming up\n(B) cheating\n(C) just aesthetics\n(D) making noise" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.765625, "loss": 4.211155891418457 }, { "id": "identity_309062", "image": "PMC-VQA/images/PMC7350248_ijms-21-04518-f001_210224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of these blood vessels?\nChoices:\n(A) red\n(B) green\n(C) blue\n(D) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.984375, "loss": 4.74796199798584 }, { "id": "identity_315479", "image": "A-OKVQA/images/196944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the animal wearing a hat?\nChoices:\n(A) stay warm\n(B) for photograph\n(C) showing off\n(D) fashion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.640625, "loss": 4.413651466369629 }, { "id": "identity_129000", "image": "VQA2.0/images/236217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is seen on the right side of the image?" }, { "from": "gpt", "value": "The answer is Taxi" } ], "clip_score": 18.140625, "loss": 4.1980977058410645 }, { "id": "identity_3546", "image": "AI2D/images/2154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The adult medusa form is represented at:\nChoices:\n(A) c\n(B) a\n(C) d\n(D) b" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.515625, "loss": 4.198662281036377 }, { "id": "identity_267156", "image": "VQA2.0/images/535278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What could be the reason for this woman attempting to touch the screen?" }, { "from": "gpt", "value": "The answer is work" } ], "clip_score": 26.65625, "loss": 5.411983489990234 }, { "id": "identity_128241", "image": "VQA-AS/images/11957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.015625, "loss": 4.029349327087402 }, { "id": "identity_181817", "image": "CLEVR-Math/images/CLEVR_train_009094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sphere?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 28.4375, "loss": 4.220433712005615 }, { "id": "identity_266984", "image": "ScienceQA/images/9640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) smooth\n(B) rough" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.109375, "loss": 4.213720321655273 }, { "id": "identity_206899", "image": "PMC-VQA/images/PMC7881081_Fig20_367266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the body is shown in the image?" }, { "from": "gpt", "value": "The answer is Hand" } ], "clip_score": 22.90625, "loss": 4.580094814300537 }, { "id": "identity_288923", "image": "PMC-VQA/images/PMC7960771_F2_391879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the brain is the lesion located?\nChoices:\n(A) Left frontal lobe\n(B) Right frontal lobe\n(C) Parietal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 4.291604518890381 }, { "id": "identity_329462", "image": "A-OKVQA/images/305901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person about to do?\nChoices:\n(A) make call\n(B) change channel\n(C) eat dinner\n(D) check temperature" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.3125, "loss": 4.458014011383057 }, { "id": "identity_280185", "image": "AI2D/images/2949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the letter D in the given choices?\nChoices:\n(A) Nexin\n(B) Inner Sheath\n(C) Plasma Membrane\n(D) Central Microtubules" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.78125, "loss": 4.155451774597168 }, { "id": "identity_47381", "image": "DocVQA/images/spnf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of difference is indicated by a double asterisk (**) in the legend?" }, { "from": "gpt", "value": "The answer is Highly unusual difference" } ], "clip_score": 20.8125, "loss": 4.9608564376831055 }, { "id": "identity_26752", "image": "PMC-VQA/images/PMC7372844_Fig4_217355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which X-ray view shows the image from above the body? \nChoices:\n(A) a Preoperative X-ray \n(B) b Postoperative X-ray in the AP plane \n(C) d Postoperative X-ray in the inlet projection \n(D) e Postoperative X-ray in the outlet projection " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.8125, "loss": 4.274962425231934 }, { "id": "identity_290940", "image": "TabMWP/images/21990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which day of the week did the technical support line receive the highest number of calls? Is it:\n(A) Tuesday\n(B) Wednesday\n(C) Thursday\n(D) Friday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.0, "loss": 4.15885591506958 }, { "id": "identity_1585", "image": "ScienceQA/images/377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can happen to the spores after they are released?\nChoices:\n(A) They can fuse and form a fertilized egg.\n(B) They can grow into an adult moss plant." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.125, "loss": 4.395251274108887 }, { "id": "identity_271660", "image": "ScienceQA/images/6832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\nChoices:\n(A) Lincoln\n(B) Des Moines\n(C) Norfolk\n(D) Davenport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.109375, "loss": 4.282687187194824 }, { "id": "identity_48886", "image": "DVQA/images/bar_train_00043413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the least number of units sold?" }, { "from": "gpt", "value": "The answer is regard" } ], "clip_score": 25.921875, "loss": 4.302236557006836 }, { "id": "identity_271808", "image": "VQA2.0/images/247190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this animal belong to the mammal category?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.890625, "loss": 4.0151801109313965 }, { "id": "identity_110360", "image": "VQA2.0/images/311726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are pulling the chariot?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.5625, "loss": 4.143722057342529 }, { "id": "identity_310958", "image": "PMC-VQA/images/PMC5602954_Fig1_216803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was used to detect the lung metastasis?\nChoices:\n(A) MRI chest and abdomen\n(B) CT chest only\n(C) PET scan only\n(D) CT chest, abdomen and pelvis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.34375, "loss": 4.33335018157959 }, { "id": "identity_261534", "image": "FigureQA/images/72815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Cyan the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.234375, "loss": 4.00057315826416 }, { "id": "identity_290780", "image": "PMC-VQA/images/PMC6635362_Fig1_9662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the images depict the subject as?\nChoices:\n(A) Carbohydrates\n(B) Bacteria\n(C) Cells\n(D) Serotype antiserum." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.8125, "loss": 4.77029275894165 }, { "id": "identity_303924", "image": "A-OKVQA/images/5282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of the boat?\nChoices:\n(A) over bridge\n(B) home\n(C) under bridge\n(D) up river" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 4.171169281005859 }, { "id": "identity_200241", "image": "PMC-VQA/images/PMC6993595_F3_109216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of the cross-section taken in the image?" }, { "from": "gpt", "value": "The answer is Pelvic level" } ], "clip_score": 31.421875, "loss": 4.260870456695557 }, { "id": "identity_320254", "image": "PMC-VQA/images/PMC7816452_Fig2_345735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fit is depicted in the image?\nChoices:\n(A) Type I fit\n(B) Type II fit\n(C) Type III fit\n(D) Type IV fit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.15625, "loss": 4.1616034507751465 }, { "id": "identity_296379", "image": "A-OKVQA/images/504974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which continent was this photo most likely captured? \nOptions:\n(A) North America\n(B) South America\n(C) Europe\n(D) Africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.0390625, "loss": 4.3761725425720215 }, { "id": "identity_312019", "image": "DocVQA/images/zpfv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is The Food Processors Institute" } ], "clip_score": 19.390625, "loss": 5.611756801605225 }, { "id": "identity_298056", "image": "PMC-VQA/images/PMC5618513_ijms-18-01864-f001_221292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this PET/CT scan, where is the high uptake observed?\nChoices:\n(A) Right supraclavicular lymph nodes\n(B) Abdominal aorta\n(C) Left supraclavicular lymph nodes\n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.34375, "loss": 4.340755939483643 }, { "id": "identity_294603", "image": "PMC-VQA/images/PMC3418000_F2_149475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is depicted in the CT scan image? \n(A) Hand \n(B) Foot \n(C) Brain \n(D) Spine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.015625, "loss": 4.685740947723389 }, { "id": "identity_273314", "image": "Super-CLEVR/images/superCLEVR_new_001125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a greater number of brown matte motorbikes in front of the green metallic object compared to the green metal regular cars?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.8125, "loss": 4.874047756195068 }, { "id": "identity_283105", "image": "VQA2.0/images/157783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ground covered with?" }, { "from": "gpt", "value": "The answer is snow" } ], "clip_score": 19.484375, "loss": 4.290537357330322 }, { "id": "identity_119441", "image": "Super-CLEVR/images/superCLEVR_new_000644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest object in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 19.421875, "loss": 4.500804901123047 }, { "id": "identity_290541", "image": "PMC-VQA/images/PMC3834309_pone-0080939-g005_244798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of image displayed from the following options:\n(A) CT Scan\n(B) PET Scan\n(C) MRI\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.625, "loss": 4.706059455871582 }, { "id": "identity_249022", "image": "TabMWP/images/26660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the market reach equilibrium, where quantity demanded equals quantity supplied?" }, { "from": "gpt", "value": "The answer is No equilibrium price" } ], "clip_score": 29.09375, "loss": 4.419981956481934 }, { "id": "identity_290682", "image": "PMC-VQA/images/PMC4186633_f1-ol-08-05-2059_325677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow point to in the given image?\nChoices:\n(A) Medulla Oblongata \n(B) Cavernous sinus \n(C) Auditory cortex \n(D) Vestibular aqueducts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.84375, "loss": 4.5605974197387695 }, { "id": "identity_291498", "image": "PMC-VQA/images/PMC6321279_ijms-19-03905-f003_419394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which pair of MRI scans represents the time point that is 3 months after treatment?\n(A) A and B\n(B) C and D\n(C) A and D\n(D) B and C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.546875, "loss": 4.036019325256348 }, { "id": "identity_293764", "image": "IconQA/images/39160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you randomly choose a marble without seeing it, which color are you less likely to select?\nChoices:\n(A) green\n(B) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.09375, "loss": 4.334734916687012 }, { "id": "identity_280584", "image": "A-OKVQA/images/233341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country is Solingen HBF railway located?\nChoices:\n(A) Germany\n(B) Italy\n(C) France\n(D) Canada" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.3125, "loss": 4.295792579650879 }, { "id": "identity_9269", "image": "AI2D/images/26.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Bigger fish are eaten by ...\nChoices:\n(A) worms\n(B) humans\n(C) amphipods isopods\n(D) NA" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.953125, "loss": 4.0902605056762695 }, { "id": "identity_333704", "image": "AI2D/images/1087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is referred to as the expanded section of a leaf?\nChoices:\n(A) petiole\n(B) midrib\n(C) blade\n(D) vein" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.375, "loss": 4.561699867248535 }, { "id": "identity_11090", "image": "ScienceQA/images/4275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Samoa\n(B) Kiribati\n(C) the Marshall Islands\n(D) Solomon Islands" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 4.72463846206665 }, { "id": "identity_6598", "image": "A-OKVQA/images/256529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the common term for these objects with wheels?\nChoices:\n(A) wheelbarrow\n(B) luggage dolly\n(C) air cart\n(D) smart cart" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.390625, "loss": 4.178463935852051 }, { "id": "identity_309570", "image": "A-OKVQA/images/470442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this vehicle carrying in the back?\nChoices:\n(A) coal\n(B) wheat\n(C) wood\n(D) snow" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.859375, "loss": 4.174736976623535 }, { "id": "identity_2563", "image": "PMC-VQA/images/PMC7959875_FIG2_391530.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the computed tomography image show? \nChoices:\n(A) Lungs \n(B) Abdomen \n(C) Heart \n(D) Kidneys " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 5.446099281311035 }, { "id": "identity_11317", "image": "FigureQA/images/44207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.359375, "loss": 4.007652282714844 }, { "id": "identity_287968", "image": "TextVQA/images/102be9f03369e1b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company was responsible for brewing this beer?" }, { "from": "gpt", "value": "The answer is duclaw " } ], "clip_score": 23.109375, "loss": 5.1920599937438965 }, { "id": "identity_199783", "image": "A-OKVQA/images/340108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the ski resort?" }, { "from": "gpt", "value": "The answer is VELLÁN" } ], "clip_score": 28.546875, "loss": 4.757209777832031 }, { "id": "identity_325109", "image": "A-OKVQA/images/189153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the thing on this for?\nChoices:\n(A) warming up\n(B) cheating\n(C) just aesthetics\n(D) making noise" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.8515625, "loss": 4.2217864990234375 }, { "id": "identity_37305", "image": "AI2D/images/3187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is at the very top of the plant?\nChoices:\n(A) shoot tip\n(B) leaf\n(C) none of the above\n(D) root tip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.515625, "loss": 4.2264204025268555 }, { "id": "identity_2443", "image": "TextVQA/images/003bed7fa4de9fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the black sign?" }, { "from": "gpt", "value": "The answer is lil siam" } ], "clip_score": 17.25, "loss": 4.036757469177246 }, { "id": "identity_295294", "image": "Super-CLEVR/images/superCLEVR_new_000278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the large brown airplane the same as the tiny object in front of the small shiny bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.3125, "loss": 4.573394775390625 }, { "id": "identity_208146", "image": "PMC-VQA/images/PMC6548024_fig5_488939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the white line at the bottom of the image represent?" }, { "from": "gpt", "value": "The answer is A scale bar" } ], "clip_score": 14.8984375, "loss": 4.384109020233154 }, { "id": "identity_281301", "image": "PMC-VQA/images/PMC4378709_fig6_373043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the identified condition of the mass?\nChoices:\n(A) Lipoma\n(B) Sarcoma\n(C) Lymphoma\n(D) Osteoma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.03125, "loss": 4.423332214355469 }, { "id": "identity_262737", "image": "FigureQA/images/10575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Orchid or Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.71875, "loss": 4.678862571716309 }, { "id": "identity_97007", "image": "IconQA/images/80840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt's collar?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.984375, "loss": 4.090488910675049 }, { "id": "identity_334405", "image": "TQA/images/question_images/earth_moon_phases_1256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Observing the moon from the earth, what happens when the moon is directly behind this?\nChoices:\n(A) 2nd quarter\n(B) full moon\n(C) 1st quarter\n(D) new moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0625, "loss": 4.270577430725098 }, { "id": "identity_287535", "image": "A-OKVQA/images/302141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country does this location belong to? \n(A) Canada \n(B) Spain \n(C) Mexico \n(D) France" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4453125, "loss": 4.101731300354004 }, { "id": "identity_198339", "image": "A-OKVQA/images/119443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many graffiti tags are visible on the wall behind the person?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.609375, "loss": 4.433113098144531 }, { "id": "identity_274422", "image": "A-OKVQA/images/471737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which individual is credited with inventing this vehicle?\nChoices:\n(A) richard trevithick\n(B) orville wright\n(C) jeff goldblum\n(D) bill nye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.75, "loss": 4.152838230133057 }, { "id": "identity_263124", "image": "Super-CLEVR/images/superCLEVR_new_000958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a metallic object that is the same size as the bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.03125, "loss": 5.196837425231934 }, { "id": "identity_326946", "image": "PMC-VQA/images/PMC7706203_Fig2_311534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of section is depicted in this MRI image?\nChoices:\n(A) Sagittal section,\n(B) Coronal section,\n(C) Axial section,\n(D) Oblique section" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.328125, "loss": 4.627944469451904 }, { "id": "identity_28284", "image": "A-OKVQA/images/252086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fun activity is shown?\nChoices:\n(A) free fall\n(B) water slide\n(C) rollar coaster\n(D) bumper cars" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.210433006286621 }, { "id": "identity_147169", "image": "MapQA/images/map_281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the 52.6%-55.6% range for the percentage of runs?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.1875, "loss": 4.41688871383667 }, { "id": "identity_305149", "image": "TextVQA/images/03235d26a98bcb0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this motorcycle?" }, { "from": "gpt", "value": "The answer is fc" } ], "clip_score": 26.265625, "loss": 4.695777416229248 }, { "id": "identity_121097", "image": "VQA2.0/images/289381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest item on the bottom shelf?" }, { "from": "gpt", "value": "The answer is Small brown shoes" } ], "clip_score": 16.484375, "loss": 4.330485820770264 }, { "id": "identity_318833", "image": "ScienceQA/images/8534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of this?\nChoices:\n(A) bear sedge\n(B) barren-ground caribou\n(C) mushroom" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.859375, "loss": 4.130519866943359 }, { "id": "identity_306470", "image": "TQA/images/abc_question_images/human_system_reproductory_17042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Scrotum?\nChoices:\n(A) C\n(B) M\n(C) V\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.9375, "loss": 4.216048240661621 }, { "id": "identity_129533", "image": "VQA-RAD/images/synpic33892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the uploader mentioned in the image?" }, { "from": "gpt", "value": "The answer is Neel Vaidya" } ], "clip_score": 16.578125, "loss": 4.57171630859375 }, { "id": "identity_317299", "image": "FigureQA/images/7351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the Orange Red portion larger than the Gray portion?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.5, "loss": 4.13046932220459 }, { "id": "identity_19935", "image": "A-OKVQA/images/272463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the van at the end of the row of the left?\nChoices:\n(A) red\n(B) yellow\n(C) brown\n(D) green" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.28125, "loss": 4.023626327514648 }, { "id": "identity_227823", "image": "AI2D/images/121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relationship between the forces acting on the Earth and Moon in the diagram, and how do these forces affect their respective positions and movements?" }, { "from": "gpt", "value": "The answer is Forces balance to maintain stable Earth-Moon orbit" } ], "clip_score": 33.5, "loss": 4.13592529296875 }, { "id": "identity_187860", "image": "A-OKVQA/images/488118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct national flags can be seen in the crowd?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.8125, "loss": 4.321723937988281 }, { "id": "identity_447", "image": "ScienceQA/images/458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which air temperature was measured within the outlined area shown?\nChoices:\n(A) 3°C\n(B) -17°C\n(C) -4°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.765625, "loss": 4.0145344734191895 }, { "id": "identity_324180", "image": "A-OKVQA/images/412002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of staircase is depicted?\nChoices:\n(A) deviated\n(B) spiral\n(C) one way\n(D) floral" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.1328125, "loss": 4.532888412475586 }, { "id": "identity_319283", "image": "PMC-VQA/images/PMC7285026_microorganisms-08-00634-f001_188734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image was taken of these cells?\nChoices:\n(A) SEM image\n(B) TEM image\n(C) Confocal image\n(D) Bright-field image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.890625, "loss": 4.554226875305176 }, { "id": "identity_332255", "image": "PMC-VQA/images/PMC5519540_Fig3_193343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dotted lines indicate in these images?\nChoices:\n(A) Blood vessels.\n(B) Outline of some lungs.\n(C) Outline of a heart.\n(D) Outline of a digestive system." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.092343807220459 }, { "id": "identity_283816", "image": "PMC-VQA/images/PMC7396416_Fig7_224247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which procedure is being shown in the fluoroscopic image? \nOptions:\n(A) Renal artery angiography\n(B) Pulmonary artery embolization\n(C) Gastroduodenal artery angiography\n(D) Femoral artery embolization" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 4.395962238311768 }, { "id": "identity_498", "image": "IconQA/images/46552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) neither; white and pink are equally likely\n(B) white\n(C) pink" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.21875, "loss": 4.51846981048584 }, { "id": "identity_197029", "image": "VQA2.0/images/471860.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign in the background?" }, { "from": "gpt", "value": "The answer is END CT" } ], "clip_score": 17.96875, "loss": 5.3098320960998535 }, { "id": "identity_33168", "image": "ScienceQA/images/8239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New Hampshire\n(B) Connecticut\n(C) New York\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.703125, "loss": 4.246715068817139 }, { "id": "identity_292111", "image": "FigureQA/images/90122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 4.25479793548584 }, { "id": "identity_329733", "image": "PMC-VQA/images/PMC3758542_RSOB130081F2_228264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white spheres represent in the phase contrast images?\nChoices:\n(A) cell borders\n(B) nuclei\n(C) mitochondria\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.541889190673828 }, { "id": "identity_34143", "image": "PMC-VQA/images/PMC6462822_f0045_465006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the color green in the axial plane? \nChoices:\n(A) Posterior - anterior \n(B) Superior - inferior \n(C) Anterior - posterior \n(D) Inferior - superior. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.8125, "loss": 4.68553352355957 }, { "id": "identity_331422", "image": "A-OKVQA/images/477343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in red doing with the attached object?\nChoices:\n(A) throwing it\n(B) pulling it\n(C) painting it\n(D) kicking it" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.765625, "loss": 4.54354190826416 }, { "id": "identity_334238", "image": "PMC-VQA/images/PMC6757074_Fig5_40839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiological finding in this image?\nChoices:\n(A) Fluid in an organ\n(B) Free air in a cavity\n(C) Gas in a defect\n(D) Calcified mass in a region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.078125, "loss": 4.904078483581543 }, { "id": "identity_322746", "image": "A-OKVQA/images/459786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these decorations made of?\nChoices:\n(A) candy canes\n(B) plants\n(C) gnomes\n(D) paper airplanes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1953125, "loss": 4.296019077301025 }, { "id": "identity_130059", "image": "VQA-AS/images/4461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the pond?" }, { "from": "gpt", "value": "The answer is Irregular oval" } ], "clip_score": 34.21875, "loss": 4.256282329559326 }, { "id": "identity_8879", "image": "ScienceQA/images/17238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the primary consumer in this food web?\nChoices:\n(A) black bear\n(B) parasol fungus\n(C) bolete fungus\n(D) silver maple" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.59375, "loss": 4.200472354888916 }, { "id": "identity_277413", "image": "DocVQA/images/gncb0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which session marks the conclusion of the 'PROGRAM OVERVIEW'?" }, { "from": "gpt", "value": "The answer is TIMING FOR DTS" } ], "clip_score": 23.78125, "loss": 4.170339107513428 }, { "id": "identity_12755", "image": "FigureQA/images/52684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.6675944328308105 }, { "id": "identity_29545", "image": "ScienceQA/images/4888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Barbados\n(B) Saint Lucia\n(C) Saint Kitts and Nevis\n(D) Saint Vincent and the Grenadines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0, "loss": 4.231833457946777 }, { "id": "identity_271176", "image": "PMC-VQA/images/PMC5432899_Fig2_165183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the plant is depicted in image 'd' among the given choices? Is it:\n(A) Habitat\n(B) Leaf Adaxial Surface\n(C) Leaf Abaxial Surface\n(D) Flower, Different parts in separation, front view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.140625, "loss": 4.685937881469727 }, { "id": "identity_282246", "image": "Super-CLEVR/images/superCLEVR_new_000371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object located in front of the large rubber bus and to the left of the tiny school bus?" }, { "from": "gpt", "value": "The answer is regular bus" } ], "clip_score": 26.625, "loss": 5.1486358642578125 }, { "id": "identity_290614", "image": "A-OKVQA/images/6998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the boy riding his bike located in terms of area?\nChoices:\n(A) sidewalk\n(B) street\n(C) boardwalk\n(D) bike lane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3125, "loss": 4.168784141540527 }, { "id": "identity_274151", "image": "PMC-VQA/images/PMC3281964_pone-0031618-g001_126322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this image, how many optic lobes can be seen?\nChoices:\n(A) One\n(B) Two\n(C) Three\n(D) Four" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.796875, "loss": 4.158226013183594 }, { "id": "identity_193322", "image": "A-OKVQA/images/539233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is on the desk to the left of the lamp?" }, { "from": "gpt", "value": "The answer is A laptop" } ], "clip_score": 24.8125, "loss": 4.090399265289307 }, { "id": "identity_275992", "image": "TQA/images/question_images/human_system_circulatory_3649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of the ventricles in the heart?\nChoices:\n(A) Prevent backflow of the blood into the atrium\n(B) Send blood out of the heart\n(C) None\n(D) Receive oxygenated blood from the lungs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.5625, "loss": 4.284049034118652 }, { "id": "identity_273390", "image": "IconQA/images/90663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle as a face in this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.25, "loss": 4.226235866546631 }, { "id": "identity_331058", "image": "PMC-VQA/images/PMC8266210_f1_3952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to create image B?\nChoices:\n(A) MRI \n(B) CT \n(C) PET \n(D) SPECT" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.375, "loss": 4.05757999420166 }, { "id": "identity_186315", "image": "VQA2.0/images/28853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible windows are there on the building to the left?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.09375, "loss": 4.3740644454956055 }, { "id": "identity_188425", "image": "VizWiz/images/VizWiz_train_00004219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tile floor?" }, { "from": "gpt", "value": "The answer is Light beige" } ], "clip_score": 24.28125, "loss": 4.178810119628906 }, { "id": "identity_65754", "image": "AI2D/images/4167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of rocks are represented in this rock cycle diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.25, "loss": 4.023600101470947 }, { "id": "identity_277741", "image": "A-OKVQA/images/418949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which professional athlete is associated with this number? \nOptions:\n(A) Nikolai Khabibulin\n(B) Ichiro Suzuki\n(C) Andruw Jones\n(D) Wayne Gretzky" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.109375, "loss": 4.14004373550415 }, { "id": "identity_25258", "image": "PMC-VQA/images/PMC7865008_Fig10_361447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the yellow parts in the 3D model? \nChoices:\n(A) T-cell epitopes \n(B) Rest of the residues \n(C) B-cell epitopes \n(D) Antibodies " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 4.408085346221924 }, { "id": "identity_305749", "image": "VQA-RAD/images/synpic34449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where in the image are these located?" }, { "from": "gpt", "value": "The answer is High on the image" } ], "clip_score": 19.34375, "loss": 4.633541584014893 }, { "id": "identity_288563", "image": "TabMWP/images/26380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.484375, "loss": 4.230365753173828 }, { "id": "identity_3555", "image": "A-OKVQA/images/130339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the left wearing?\nChoices:\n(A) glasses\n(B) fedora\n(C) top hat\n(D) boa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.6640625, "loss": 4.65985107421875 }, { "id": "identity_8520", "image": "A-OKVQA/images/296544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What main dish is served here?\nChoices:\n(A) flower soup\n(B) pizza\n(C) flower cake\n(D) meat stew" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.377851486206055 }, { "id": "identity_290448", "image": "IconQA/images/45601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.088299751281738 }, { "id": "identity_334163", "image": "ScienceQA/images/12568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Des Moines\n(B) Davenport\n(C) Cedar Rapids\n(D) Springfield" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.125, "loss": 4.3441996574401855 }, { "id": "identity_276414", "image": "TextVQA/images/8f693bc2dd576300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the beer located at the bottom?" }, { "from": "gpt", "value": "The answer is pedigree" } ], "clip_score": 15.734375, "loss": 4.379889965057373 }, { "id": "identity_23292", "image": "FigureQA/images/50278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.046875, "loss": 4.038866996765137 }, { "id": "identity_63314", "image": "ScienceQA/images/5113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Oregon" } ], "clip_score": 30.328125, "loss": 4.107038497924805 }, { "id": "identity_188101", "image": "PMC-VQA/images/PMC6214163_Fig1_390456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the left lung in the image?" }, { "from": "gpt", "value": "The answer is Left lung appears normal" } ], "clip_score": 29.609375, "loss": 4.434863090515137 }, { "id": "identity_18269", "image": "FigureQA/images/84560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Pale Green have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.09375, "loss": 4.007160186767578 }, { "id": "identity_323677", "image": "AI2D/images/3211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates Leaf?\nChoices:\n(A) I\n(B) K\n(C) J\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.90625, "loss": 4.329528331756592 }, { "id": "identity_280236", "image": "AI2D/images/1659.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the path that the Earth follows around the Sun?\nChoices:\n(A) penumbra\n(B) lunar eclipse\n(C) umbra\n(D) orbit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.59375, "loss": 4.102864742279053 }, { "id": "identity_27727", "image": "A-OKVQA/images/287105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the hardware on the doors brown?\nChoices:\n(A) rust\n(B) patina\n(C) stain\n(D) paint" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3203125, "loss": 4.111562252044678 }, { "id": "identity_12567", "image": "A-OKVQA/images/108075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man using the computer to do?\nChoices:\n(A) game\n(B) watch movie\n(C) film\n(D) type" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.984375, "loss": 4.173023223876953 }, { "id": "identity_305675", "image": "FigureQA/images/75409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the Dark Orange curve the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.523245334625244 }, { "id": "identity_29920", "image": "Super-CLEVR/images/superCLEVR_new_000721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The tiny object behind the small thing that is in front of the large bicycle is what color?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 32.90625, "loss": 4.015300273895264 }, { "id": "identity_32696", "image": "A-OKVQA/images/563713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country would this vase typically originate from?\nChoices:\n(A) china\n(B) ethiopia\n(C) greece\n(D) denmark" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.251450538635254 }, { "id": "identity_25172", "image": "DVQA/images/bar_train_00060600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 24.703125, "loss": 5.009298324584961 }, { "id": "identity_261333", "image": "TQA/images/abc_question_images/ocean_zones_17136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, which label corresponds to the ocean's photic zone?\nChoices:\n(A) Y\n(B) J\n(C) C\n(D) X" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.859375, "loss": 4.151402473449707 }, { "id": "identity_296365", "image": "IconQA/images/81674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.734375, "loss": 5.09478235244751 }, { "id": "identity_7139", "image": "FigureQA/images/40039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Gray the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 4.09896183013916 }, { "id": "identity_296764", "image": "IconQA/images/51080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.203125, "loss": 4.171607971191406 }, { "id": "identity_2123", "image": "AI2D/images/3023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is between bract and scape\nChoices:\n(A) roots\n(B) petal\n(C) folliage\n(D) crown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 4.265264511108398 }, { "id": "identity_324634", "image": "A-OKVQA/images/15757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the individual holding the sail in this image standing?\nChoices:\n(A) whale's back\n(B) roadway\n(C) ocean\n(D) beach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.225098133087158 }, { "id": "identity_331839", "image": "A-OKVQA/images/580613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the word out refer to in this context?\nChoices:\n(A) energy\n(B) personality\n(C) location\n(D) sexual orientation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.78125, "loss": 4.417482376098633 }, { "id": "identity_335573", "image": "A-OKVQA/images/231476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this place called?\nChoices:\n(A) mall\n(B) taxi stand\n(C) train depot\n(D) repair shop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.59375, "loss": 4.313803672790527 }, { "id": "identity_282191", "image": "PMC-VQA/images/PMC5360669_Fig1_143718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of fracture is shown in the X-rays among the following choices: (A) Intraarticular, (B) Pathological, (C) Extra-Articular, or (D) Transverse?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.640625, "loss": 4.341658592224121 }, { "id": "identity_322120", "image": "AI2D/images/628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this stage depict?\nChoices:\n(A) flower\n(B) seed\n(C) fruit\n(D) death" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0625, "loss": 4.21488094329834 }, { "id": "identity_325712", "image": "PMC-VQA/images/PMC7031565_os12599-fig-0006_121402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vertebrae have this feature?\nChoices:\n(A) thoracic vertebrae\n(B) lumbar vertebrae\n(C) cervical vertebrae\n(D) sacral vertebrae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.398494243621826 }, { "id": "identity_111584", "image": "TextVQA/images/4b16ce5e64999340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name on the badge of the man on the left?" }, { "from": "gpt", "value": "The answer is Browning" } ], "clip_score": 17.734375, "loss": 4.1258544921875 }, { "id": "identity_1598", "image": "VQA2.0/images/424044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is preparing the food?" }, { "from": "gpt", "value": "The answer is boy" } ], "clip_score": 18.046875, "loss": 4.050529479980469 }, { "id": "identity_217273", "image": "A-OKVQA/images/258249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible vehicles are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.875, "loss": 4.366405963897705 }, { "id": "identity_308009", "image": "FigureQA/images/7198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum value for Dark Magenta?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.078125, "loss": 4.0226054191589355 }, { "id": "identity_210480", "image": "A-OKVQA/images/236295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow tags can be seen attached to the animal?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.453125, "loss": 4.557056427001953 }, { "id": "identity_217589", "image": "TextVQA/images/9de238fff437f9d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct light sources can be identified in the image?" }, { "from": "gpt", "value": "The answer is Two light sources" } ], "clip_score": 16.84375, "loss": 4.139071941375732 }, { "id": "identity_273699", "image": "DocVQA/images/shlk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for overseeing nutrition and snack foods as the hostess?" }, { "from": "gpt", "value": "The answer is Connie Bea Hope" } ], "clip_score": 19.859375, "loss": 4.608597755432129 }, { "id": "identity_62395", "image": "DocVQA/images/gkpg0078_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the research area mentioned in the contract summary?" }, { "from": "gpt", "value": "The answer is nicotinics cognition, animal studies" } ], "clip_score": 23.578125, "loss": 4.5229716300964355 }, { "id": "identity_26713", "image": "A-OKVQA/images/359451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person wearing a blue apron sell at the rightmost kiosk?\nChoices:\n(A) food\n(B) shoes\n(C) clothes\n(D) souvenirs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.48046875, "loss": 4.5087080001831055 }, { "id": "identity_108591", "image": "VQA2.0/images/495445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is the child in the foreground holding?" }, { "from": "gpt", "value": "The answer is A white plate" } ], "clip_score": 26.5, "loss": 4.407114028930664 }, { "id": "identity_27301", "image": "TextVQA/images/fd3344235cd93c92.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the bold blue word on the page?" }, { "from": "gpt", "value": "The answer is wishing" } ], "clip_score": 14.5625, "loss": 4.341678142547607 }, { "id": "identity_22946", "image": "PMC-VQA/images/PMC4377159_Fig1_372559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was found in the left neck of the patient? \nChoices:\n(A) Adenocarcinoma \n(B) Hodgkin Lymphoma \n(C) Tonsillitis \n(D) Leukemia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.421875, "loss": 4.71255350112915 }, { "id": "identity_131829", "image": "VQA2.0/images/482810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pigeons are in flight in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.796875, "loss": 4.250717639923096 }, { "id": "identity_292679", "image": "TextVQA/images/534f4ce2ac0805ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company manufactured the jars?" }, { "from": "gpt", "value": "The answer is diesel hpr" } ], "clip_score": 27.328125, "loss": 5.301716327667236 }, { "id": "identity_281335", "image": "PMC-VQA/images/PMC8305520_Fig2_17359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe is the pulmonary nodule located?\nChoices:\n(A) Right upper lobe\n(B) Right lower lobe\n(C) Left upper lobe\n(D) Left lower lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.8125, "loss": 4.45249080657959 }, { "id": "identity_140529", "image": "DocVQA/images/hjmg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which employee has the earliest 'FROM' service date?" }, { "from": "gpt", "value": "The answer is Lenore W Alberty" } ], "clip_score": 20.953125, "loss": 4.218554496765137 }, { "id": "identity_308332", "image": "AI2D/images/802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there was a sudden decrease in these, which would be the most affected?\nChoices:\n(A) Grass\n(B) None\n(C) Snake\n(D) Hawk" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.21875, "loss": 4.617351055145264 }, { "id": "identity_285425", "image": "VQA2.0/images/187559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From what shape is the basket that the giraffes are eating from?" }, { "from": "gpt", "value": "The answer is cone" } ], "clip_score": 25.84375, "loss": 4.458016395568848 }, { "id": "identity_332308", "image": "A-OKVQA/images/10434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person jumping in the air?\nChoices:\n(A) exercising\n(B) to fight\n(C) to avoid\n(D) doing trick" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.2109375, "loss": 4.002325534820557 }, { "id": "identity_299668", "image": "Super-CLEVR/images/superCLEVR_new_000069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the green rubber object located on the right side of the matte bicycle, in front of the object behind the yellow rubber object?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 32.5625, "loss": 4.5758056640625 }, { "id": "identity_312393", "image": "PMC-VQA/images/PMC3680930_F1_210959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of carcinoma is depicted in this image?\nChoices:\n(A) Adenocarcinoma \n(B) Squamous cell carcinoma \n(C) Basal cell carcinoma \n(D) Ductal carcinoma in situ" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.921875, "loss": 4.053980350494385 }, { "id": "identity_2478", "image": "DVQA/images/bar_train_00138906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is firm" } ], "clip_score": 22.578125, "loss": 4.078261375427246 }, { "id": "identity_274758", "image": "FigureQA/images/16963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Teal or Chartreuse?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.34375, "loss": 4.023098945617676 }, { "id": "identity_71534", "image": "ScienceQA/images/4853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of texture does the rock in the image exhibit?" }, { "from": "gpt", "value": "The answer is Soapy or greasy texture" } ], "clip_score": 27.578125, "loss": 4.32967472076416 }, { "id": "identity_276684", "image": "PMC-VQA/images/PMC4074584_F4_302021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrows in figure (a) represent?\nChoices:\n(A) Various mineral-like structures of different sizes on the fibrillar surface\n(B) Bacterial colonies merging in a root canal\n(C) A virus attacking the pulp cell spheres\n(D) Various fibrils of different sizes on the mineral-like surface." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.4375, "loss": 4.128176689147949 }, { "id": "identity_30916", "image": "PMC-VQA/images/PMC6418688_polymers-09-00476-f002_451307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are the images sorted? \nChoices:\n(A) Randomly \n(B) According to increasing fluorescence \n(C) According to increasing membrane thickness \n(D) According to increasing lateral chain volume " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.09375, "loss": 4.445941925048828 }, { "id": "identity_129205", "image": "VQA2.0/images/518499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.09375, "loss": 5.199656009674072 }, { "id": "identity_302444", "image": "FigureQA/images/81066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Web Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0, "loss": 4.051346302032471 }, { "id": "identity_243649", "image": "GeoQA+/images/3985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,AB=8,点M在⊙O上,∠MAB=20°,N是⁀{MB}的中点,P是直径AB上的一动点,求PM+PN的最小值以及当PM+PN最小时点P的位置。" }, { "from": "gpt", "value": "The answer is \\( 4 \\)" } ], "clip_score": 27.15625, "loss": 4.107885837554932 }, { "id": "identity_283929", "image": "A-OKVQA/images/456077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what type of location is this event happening? rural, city, residential, or country?\nChoices:\n(A) rural\n(B) city\n(C) residential\n(D) country" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.7265625, "loss": 4.517629146575928 }, { "id": "identity_149271", "image": "DocVQA/images/jhjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is the Platte County Record-Times published in?" }, { "from": "gpt", "value": "The answer is Wyo" } ], "clip_score": 22.46875, "loss": 4.029833793640137 }, { "id": "identity_258088", "image": "VQA-AS/images/16658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential hazards in the room for the child, and what precautions should be taken to ensure the child's safety?" }, { "from": "gpt", "value": "The answer is Remove wine glass, secure plant, supervise child around dog" } ], "clip_score": 30.28125, "loss": 4.016922473907471 }, { "id": "identity_107716", "image": "VizWiz/images/VizWiz_train_00009679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the straw in the glass?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 24.28125, "loss": 5.02683162689209 }, { "id": "identity_329850", "image": "PMC-VQA/images/PMC9337940_fig1_360164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which areas display unusual FDG activity in this image?\nChoices:\n(A) lung \n(B) liver \n(C) thyroid \n(D) intestine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.859375, "loss": 4.189648628234863 }, { "id": "identity_262173", "image": "TQA/images/abc_question_images/nuclear_energy_17105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which component is represented by label M in the diagram?\nChoices:\n(A) Boron Control Rods\n(B) Uranium Fuel Rods\n(C) Pump\n(D) Core" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.859375, "loss": 4.776026248931885 }, { "id": "identity_296340", "image": "IconQA/images/61458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence after the first picture of a boat?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.09375, "loss": 4.029055595397949 }, { "id": "identity_36721", "image": "TQA/images/abc_question_images/ocean_zones_18126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents pelagic?\nChoices:\n(A) A\n(B) L\n(C) D\n(D) U" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.34375, "loss": 4.19319486618042 }, { "id": "identity_222274", "image": "A-OKVQA/images/380734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many diners are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 28.328125, "loss": 4.460595607757568 }, { "id": "identity_16239", "image": "PMC-VQA/images/PMC4119891_fig1_311018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is highlighted by the small black arrows in the SEM image of the PLC tube-guide? \nChoices:\n(A) Inner Diameter \n(B) Cell \n(C) Outer Diameter \n(D) Magnification " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 4.133389949798584 }, { "id": "identity_315580", "image": "VizWiz/images/VizWiz_train_00014217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do you have any idea what this could be?" }, { "from": "gpt", "value": "The answer is chipahoy cookies" } ], "clip_score": 28.875, "loss": 4.232398509979248 }, { "id": "identity_290860", "image": "TQA/images/abc_question_images/tides_10145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices represents the letter \"U\"? \n(A) MOON\n(B) OCEAN\n(C) SOLAR SYSTEM\n(D) EARTH" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.546875, "loss": 4.312411308288574 }, { "id": "identity_57400", "image": "DVQA/images/bar_train_00188075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for coffee?" }, { "from": "gpt", "value": "The answer is hide" } ], "clip_score": 26.890625, "loss": 4.77950382232666 }, { "id": "identity_335476", "image": "PMC-VQA/images/PMC6341153_Fig1_425609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the solid arrow point to?\nChoices:\n(A) pituitary stalk \n(B) medulla oblongata \n(C) optic chiasm \n(D) pineal gland" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.765625, "loss": 4.974011421203613 }, { "id": "identity_334465", "image": "A-OKVQA/images/529314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are the people in the stands here likely connected to the players?\nChoices:\n(A) protesters\n(B) family\n(C) employees\n(D) advertising sponsors" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.078125, "loss": 4.757791519165039 }, { "id": "identity_295234", "image": "A-OKVQA/images/16578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of car is identified by the number 42 on its top?\nChoices:\n(A) convertible\n(B) taxi\n(C) shipping\n(D) prison" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.109375, "loss": 4.244126319885254 }, { "id": "identity_291032", "image": "PMC-VQA/images/PMC5654943_Fig1_230962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which cerebral hemisphere is the mass situated?\nChoices:\n(A) Left\n(B) Right\n(C) Frontal\n(D) Occipital" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.578125, "loss": 4.106255531311035 }, { "id": "identity_327274", "image": "TextVQA/images/fd3344235cd93c92.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bold blue word on this?" }, { "from": "gpt", "value": "The answer is wishing" } ], "clip_score": 13.9609375, "loss": 4.209889888763428 }, { "id": "identity_1104", "image": "Super-CLEVR/images/superCLEVR_new_000790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a tiny bus that is the same color as the big matte aeroplane; what is it made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 28.828125, "loss": 4.039538383483887 }, { "id": "identity_23868", "image": "ScienceQA/images/15314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Connecticut?\nChoices:\n(A) Trenton\n(B) New Haven\n(C) Hartford\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.203125, "loss": 4.123690605163574 }, { "id": "identity_277512", "image": "A-OKVQA/images/306420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the scarf worn by the women?\nChoices:\n(A) neckies\n(B) dupatta\n(C) hijab\n(D) dickies" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.09375, "loss": 4.648116588592529 }, { "id": "identity_33237", "image": "A-OKVQA/images/106484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is he with the dog?\nChoices:\n(A) selling it\n(B) walk it\n(C) exercising\n(D) stealing it" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.28125, "loss": 4.126138210296631 }, { "id": "identity_22318", "image": "PMC-VQA/images/PMC6410175_nanomaterials-09-00263-f003_448201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the green and purple dots indicate in the images? \nChoices:\n(A) Single NCs \n(B) Zn and Mn \n(C) ARM \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.890625, "loss": 5.073309421539307 }, { "id": "identity_273987", "image": "TabMWP/images/34663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day did the hotel have the lowest number of guests, according to the data looked up by the front desk clerk?\nChoices:\n(A) Friday\n(B) Saturday\n(C) Sunday\n(D) Monday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.3125, "loss": 4.375827789306641 }, { "id": "identity_337247", "image": "IconQA/images/87634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a turtle. What is the second image?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 4.103073596954346 }, { "id": "identity_151106", "image": "ScienceQA/images/9024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the hair clip most likely made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 24.171875, "loss": 4.0261125564575195 }, { "id": "identity_38563", "image": "A-OKVQA/images/428595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the women here find most interesting?\nChoices:\n(A) walkway\n(B) goose\n(C) child\n(D) duck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.8671875, "loss": 4.205019950866699 }, { "id": "identity_316905", "image": "PMC-VQA/images/PMC6230427_fig1_394826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the initial diagnosis based on these scan results?\nChoices:\n(A) Hypermetabolic activity\n(B) Malignant lymphoma\n(C) Tuberculosis\n(D) Both A and B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.359375, "loss": 5.135953903198242 }, { "id": "identity_193226", "image": "VQA2.0/images/476852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lenses can be seen on the desk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.984375, "loss": 4.355190277099609 }, { "id": "identity_204465", "image": "PMC-VQA/images/PMC6262857_BIO038661F3_402705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label indicated by the white arrowhead?" }, { "from": "gpt", "value": "The answer is lp" } ], "clip_score": 22.5, "loss": 4.105851173400879 }, { "id": "identity_143266", "image": "MapQA/images/map_3325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the darkest color shade on the map?" }, { "from": "gpt", "value": "The answer is a state name within ten words" } ], "clip_score": 28.75, "loss": 4.569588661193848 }, { "id": "identity_278622", "image": "DVQA/images/bar_train_00028518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bottom-most bar in each group?" }, { "from": "gpt", "value": "The answer is park" } ], "clip_score": 19.265625, "loss": 4.7363057136535645 }, { "id": "identity_185068", "image": "PMC-VQA/images/PMC6441667_Fig7_459427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teeth are missing from the upper right quadrant?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.859375, "loss": 4.516781806945801 }, { "id": "identity_129482", "image": "Super-CLEVR/images/superCLEVR_new_001127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object directly above the blue motorcycle?" }, { "from": "gpt", "value": "The answer is elongated with wings" } ], "clip_score": 24.90625, "loss": 4.152124404907227 }, { "id": "identity_5503", "image": "TQA/images/question_images/cycle_nitrogen_6708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does Fossil Fuel Emissions go towards?\nChoices:\n(A) Organic Matter\n(B) Gaseous Losses\n(C) Precipitation\n(D) Bacteria Fixation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.515625, "loss": 4.254735946655273 }, { "id": "identity_331697", "image": "A-OKVQA/images/323726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brown object in the person's hand?\nChoices:\n(A) football\n(B) resin\n(C) glove\n(D) pretzel" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6328125, "loss": 4.411444664001465 }, { "id": "identity_27247", "image": "A-OKVQA/images/528713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is going to take place?\nChoices:\n(A) horse parade\n(B) horse racing\n(C) horse trading\n(D) horse show" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.183958530426025 }, { "id": "identity_135429", "image": "DVQA/images/bar_train_00115197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the highest number of units sold?" }, { "from": "gpt", "value": "The answer is gift" } ], "clip_score": 22.515625, "loss": 4.502500534057617 }, { "id": "identity_291192", "image": "DVQA/images/bar_train_00019132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values less than 1?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 19.65625, "loss": 4.111113548278809 }, { "id": "identity_7585", "image": "AI2D/images/3687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which connects to the esophagus?\nChoices:\n(A) Rectum\n(B) Mouth\n(C) Liver\n(D) Pancreas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.625, "loss": 4.212514877319336 }, { "id": "identity_28860", "image": "A-OKVQA/images/387704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country does the flag resemble?\nChoices:\n(A) american\n(B) madagascar\n(C) india\n(D) china" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.90625, "loss": 4.266694068908691 }, { "id": "identity_320873", "image": "IconQA/images/25762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.09375, "loss": 4.069913387298584 }, { "id": "identity_66548", "image": "ScienceQA/images/2190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many components does the toilet plunger have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.890625, "loss": 4.1474809646606445 }, { "id": "identity_7570", "image": "TQA/images/abc_question_images/human_system_eye_16126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents a clear, protective covering on the outside of the eye?\nChoices:\n(A) K\n(B) T\n(C) C\n(D) P" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.875, "loss": 4.375499725341797 }, { "id": "identity_29021", "image": "A-OKVQA/images/358100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What style of vehicle is the taxi cab?\nChoices:\n(A) sedan\n(B) truck\n(C) suv\n(D) compact" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.53125, "loss": 4.223226070404053 }, { "id": "identity_281663", "image": "PMC-VQA/images/PMC5266295_pone.0170598.g001_119344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific area of the brain is being examined in these maps? Is it:\n(A) Brain stem\n(B) Midbrain\n(C) Visual cortex\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.046875, "loss": 4.205667972564697 }, { "id": "identity_328590", "image": "TextVQA/images/0e88544270a85d06.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What logo is underneath this?" }, { "from": "gpt", "value": "The answer is keen" } ], "clip_score": 17.265625, "loss": 4.222637176513672 }, { "id": "identity_315586", "image": "A-OKVQA/images/490338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the store with a sign featuring a blue arrow sell?\nChoices:\n(A) dvd\n(B) udon\n(C) sushi\n(D) vhs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2421875, "loss": 4.4596028327941895 }, { "id": "identity_133160", "image": "A-OKVQA/images/577122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electrical outlets are visible above the counter?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.671875, "loss": 4.730668067932129 }, { "id": "identity_277288", "image": "IconQA/images/47032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) probable\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0625, "loss": 4.261681079864502 }, { "id": "identity_294473", "image": "Super-CLEVR/images/superCLEVR_new_000838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any small shiny objects have the same shape as the red rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.46875, "loss": 5.062124252319336 }, { "id": "identity_7403", "image": "DVQA/images/bar_train_00058756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the accuracy of the algorithm female larger than jar?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.828125, "loss": 4.04425573348999 }, { "id": "identity_36985", "image": "PMC-VQA/images/PMC7003845_vop12710-fig-0002_112141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the AC in the image? \nChoices:\n(A) The lens \n(B) The iris \n(C) A tumor \n(D) A structure in the stroma. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.96875, "loss": 4.074520111083984 }, { "id": "identity_7434", "image": "ScienceQA/images/17694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Tonga\n(B) Tuvalu\n(C) Fiji\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.122528076171875 }, { "id": "identity_275481", "image": "ScienceQA/images/15324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's skin is more effectively adapted as a warning sign to deter predators: the Spanish shawl nudibranch or the impala?\nChoices:\n(A) Spanish shawl nudibranch\n(B) impala" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.65625, "loss": 4.060940742492676 }, { "id": "identity_297511", "image": "IconQA/images/97473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on: white or green?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.061306953430176 }, { "id": "identity_319099", "image": "FigureQA/images/94952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.890625, "loss": 4.02906608581543 }, { "id": "identity_322522", "image": "A-OKVQA/images/358523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this?\n\nChoices:\n(A) puzzle\n(B) photo\n(C) photoshopped picture\n(D) drawing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.265625, "loss": 4.634329795837402 }, { "id": "identity_277119", "image": "TextVQA/images/903dec817b5e9837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is located on his sleeve?" }, { "from": "gpt", "value": "The answer is bb" } ], "clip_score": 16.84375, "loss": 4.354520797729492 }, { "id": "identity_118408", "image": "TextVQA/images/951eca6e9b73b017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color theme of the image?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 15.6953125, "loss": 4.281304359436035 }, { "id": "identity_294937", "image": "ScienceQA/images/1107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area?\nOptions:\n(A) Tuvalu\n(B) the Federated States of Micronesia\n(C) Palau\n(D) Fiji" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.625, "loss": 4.649848461151123 }, { "id": "identity_330254", "image": "PMC-VQA/images/PMC6347193_pone.0211187.g012_427407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two components stained in this image?\nChoices:\n(A) Endoplasmic reticulum and nucleus.\n(B) Cytoskeleton and mitochondria.\n(C) Golgi apparatus and nucleus.\n(D) Lysosomes and ribosomes." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 4.614497184753418 }, { "id": "identity_310547", "image": "Super-CLEVR/images/superCLEVR_new_000604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the large object that is in front of the green dirtbike and on the left side of the gray plane?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 27.4375, "loss": 4.439748764038086 }, { "id": "identity_124820", "image": "VQA2.0/images/9321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are parked in front of the building?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.046875, "loss": 4.797603607177734 }, { "id": "identity_332955", "image": "A-OKVQA/images/404710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this activity called?\nChoices:\n(A) jumping\n(B) hurling\n(C) vaulting\n(D) sailing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.34375, "loss": 4.543743133544922 }, { "id": "identity_271339", "image": "IconQA/images/37912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.4375, "loss": 4.0334672927856445 }, { "id": "identity_2828", "image": "ScienceQA/images/3080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which air temperature was measured within the outlined area shown?\nChoices:\n(A) 5°C\n(B) 27°C\n(C) 14°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.984375, "loss": 4.107337951660156 }, { "id": "identity_26719", "image": "PMC-VQA/images/PMC6455102_f4_462834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely cause of the focal uptakes seen in image A? \nChoices:\n(A) Osteoarthritis \n(B) Rheumatoid arthritis \n(C) Soft tissue injury \n(D) Cancer metastasis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5625, "loss": 4.399369239807129 }, { "id": "identity_50203", "image": "MapQA/images/map_2796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of agencies within the range of 81.5%-86.4%?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 34.875, "loss": 4.176386833190918 }, { "id": "identity_235716", "image": "ScienceQA/images/15022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property affect their usage in everyday tasks?" }, { "from": "gpt", "value": "The answer is Both are bendable, aiding in flexibility and manipulation" } ], "clip_score": 23.09375, "loss": 4.0497050285339355 }, { "id": "identity_310914", "image": "VQA2.0/images/285131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is happening?" }, { "from": "gpt", "value": "The answer is cars" } ], "clip_score": 20.375, "loss": 4.536588191986084 }, { "id": "identity_281680", "image": "A-OKVQA/images/213922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for them wearing those jackets?\nChoices:\n(A) rain repellant\n(B) snow repellant\n(C) hot\n(D) cold" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.9921875, "loss": 4.371667385101318 }, { "id": "identity_224832", "image": "Geometry3K/images/662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\(\\angle K\\)" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 31.421875, "loss": 4.211117267608643 }, { "id": "identity_315771", "image": "FigureQA/images/66744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5, "loss": 4.2187395095825195 }, { "id": "identity_16753", "image": "Super-CLEVR/images/superCLEVR_new_000020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same shape as the large blue object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.84375, "loss": 4.996517658233643 }, { "id": "identity_316842", "image": "ScienceQA/images/10243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Vanuatu\n(B) Fiji\n(C) New Zealand\n(D) Nauru" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.828125, "loss": 4.183163642883301 }, { "id": "identity_25931", "image": "A-OKVQA/images/237919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman in the chair's role?\nChoices:\n(A) ball boy\n(B) line judge\n(C) referee\n(D) chair umpire" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.703125, "loss": 4.553654670715332 }, { "id": "identity_322424", "image": "PMC-VQA/images/PMC6022295_Fig2_334194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific finding on this abdominal CT?\nChoices:\n(A) multiple nodules\n(B) diverticulitis\n(C) ulcers\n(D) polypoid lesions" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.390625, "loss": 4.7834391593933105 }, { "id": "identity_154994", "image": "ScienceQA/images/7435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the prefix \"Ante\" indicate in the words provided?" }, { "from": "gpt", "value": "The answer is Before" } ], "clip_score": 24.296875, "loss": 4.132089614868164 }, { "id": "identity_324129", "image": "A-OKVQA/images/106375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a bike on the back of this vehicle?\nChoices:\n(A) transporting it\n(B) stealing it\n(C) found it\n(D) selling it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.140625, "loss": 4.148221969604492 }, { "id": "identity_300245", "image": "TQA/images/abc_question_images/cycle_nitrogen_16710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is released into the air from this? \nChoices:\n(A) AMMONIA\n(B) UPTAKE BY PLANTS\n(C) NITRATES\n(D) LIGHTNING" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.84375, "loss": 4.791316032409668 }, { "id": "identity_37985", "image": "ScienceQA/images/18543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of blue particles?\nChoices:\n(A) Solution A\n(B) Solution B\n(C) neither; their concentrations are the same" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.3125, "loss": 4.129619598388672 }, { "id": "identity_254278", "image": "A-OKVQA/images/471188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are owned by the company Greyhound, and what are their types?" }, { "from": "gpt", "value": "The answer is One bus" } ], "clip_score": 21.515625, "loss": 5.140059947967529 }, { "id": "identity_301386", "image": "FigureQA/images/58344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.020651817321777 }, { "id": "identity_22954", "image": "PMC-VQA/images/PMC3168817_F3_107656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT scan show about the maxillary sinus? \nChoices:\n(A) improvement \n(B) no change \n(C) deterioration \n(D) inflammation. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.234375, "loss": 4.2901763916015625 }, { "id": "identity_109527", "image": "VQA2.0/images/288032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.0, "loss": 4.352474212646484 }, { "id": "identity_76801", "image": "AI2D/images/2046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of plants are shown as producers in the diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.0, "loss": 4.4660539627075195 }, { "id": "identity_332323", "image": "DVQA/images/bar_train_00118428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is player" } ], "clip_score": 25.96875, "loss": 4.0737528800964355 }, { "id": "identity_324864", "image": "PMC-VQA/images/PMC4502274_fig2_405946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being compressed by the hiatal hernia in this image?\nChoices:\n(A) Right atrium and left pulmonary vein \n(B) Left atrium and right pulmonary vein \n(C) Right ventricle and left atrium \n(D) Left ventricle and right atrium" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.1614508628845215 }, { "id": "identity_270572", "image": "PMC-VQA/images/PMC6942098_Fig4_93900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following findings is the most prominent in the image provided? \n(A) Basilar peribronchial consolidative opacities \n(B) Bibasilar septal thickening \n(C) Nodular peribronchial ground glass opacities \n(D) Patchy peribronchial ground glass opacities" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 4.296642780303955 }, { "id": "identity_5884", "image": "A-OKVQA/images/189903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What pattern is the paper under the pizza?\nChoices:\n(A) checkerboard\n(B) striped\n(C) plain\n(D) swirl" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.6875, "loss": 4.348631858825684 }, { "id": "identity_23173", "image": "PMC-VQA/images/PMC6861408_Fig15_70105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the temporal bone is affected? \nChoices:\n(A) Squamous part \n(B) Tympanic part \n(C) Petrous part \n(D) Mastoid part " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.53125, "loss": 4.187808036804199 }, { "id": "identity_26849", "image": "PMC-VQA/images/PMC4466355_fig1_396404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan show 3 months before admission? \nChoices:\n(A) Abnormal growths in the lungs \n(B) Fluid buildup in the lungs \n(C) Air pockets in the lungs \n(D) Normal lung tissue appearance " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.140625, "loss": 4.2977776527404785 }, { "id": "identity_303844", "image": "PMC-VQA/images/PMC7660178_materials-13-04723-f013_296117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the surface shown in this image?\nChoices:\n(A) Surface I \n(B) Surface II \n(C) Surface III \n(D) Surface IV" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.21875, "loss": 4.247232913970947 }, { "id": "identity_323466", "image": "TextVQA/images/b230346e7d282723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written below this?" }, { "from": "gpt", "value": "The answer is southworld" } ], "clip_score": 13.53125, "loss": 5.065644264221191 }, { "id": "identity_232261", "image": "A-OKVQA/images/223481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of vehicles visible in the image, and what is the grey car driving on?" }, { "from": "gpt", "value": "The answer is Two cars, street" } ], "clip_score": 23.046875, "loss": 5.273482799530029 }, { "id": "identity_299550", "image": "PMC-VQA/images/PMC3395182_fig3_144750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is utilized to represent RNA localization in the image?\nChoices:\n(A) Red\n(B) Green\n(C) Blue\n(D) Yellow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.203125, "loss": 4.185359001159668 }, { "id": "identity_665", "image": "Super-CLEVR/images/superCLEVR_new_001482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a aeroplane to the left of the tiny metal utility bike; is it the same size as the bus right of the metal utility bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.609375, "loss": 4.748712539672852 }, { "id": "identity_297885", "image": "PMC-VQA/images/PMC6957557_Fig3_99164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red arrow pointing to in the image?\nChoices:\n(A) The previous gastrostomy site \n(B) The probe insertion site \n(C) The ulcer area after ESD \n(D) The healthy tissue around the ulcer area." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 4.376952648162842 }, { "id": "identity_308404", "image": "A-OKVQA/images/159475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person leaning forward?\nChoices:\n(A) is falling\n(B) maintaining balance\n(C) was surprised\n(D) bad back" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.375, "loss": 4.6316657066345215 }, { "id": "identity_325147", "image": "DVQA/images/bar_train_00060600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 24.703125, "loss": 5.009298324584961 }, { "id": "identity_294090", "image": "PMC-VQA/images/PMC6664500_Fig1_15932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the first cranial slide taken in terms of anatomical landmarks?\nChoices:\n(A) L5 \n(B) The common iliac joint \n(C) Above the external iliac node \n(D) Below the internal obturator artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.48584508895874 }, { "id": "identity_329879", "image": "A-OKVQA/images/37932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As it is walked by someone, what is inside of the dog's mouth?\nChoices:\n(A) umbrella\n(B) frisbee\n(C) bone\n(D) stick" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.40625, "loss": 4.180098533630371 }, { "id": "identity_126751", "image": "A-OKVQA/images/489845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many video game controllers are visible on the floor?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.75, "loss": 4.771387100219727 }, { "id": "identity_267295", "image": "AI2D/images/3299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the outer part in the diagram:\n(A) ribosome\n(B) cytoplasm\n(C) lysosome\n(D) cell membrane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.46875, "loss": 4.036218166351318 }, { "id": "identity_320361", "image": "PMC-VQA/images/PMC7431907_F12_235809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesion is shown in the images?\nChoices:\n(A) small cortico-subcortical stroke lesion\n(B) large cortico-subcortical stroke lesion\n(C) chronic infarct lesion\n(D) normal brain tissue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 4.6341657638549805 }, { "id": "identity_210553", "image": "A-OKVQA/images/435579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 11.4921875, "loss": 4.401739597320557 }, { "id": "identity_266963", "image": "ScienceQA/images/21203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area on the map? Is it:\n(A) Trinidad and Tobago\n(B) Haiti\n(C) the Dominican Republic\n(D) Dominica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.5625, "loss": 4.130826473236084 }, { "id": "identity_856", "image": "VQA-RAD/images/synpic56061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region of the brain is impacted?" }, { "from": "gpt", "value": "The answer is Anterior surface" } ], "clip_score": 29.078125, "loss": 4.563469409942627 }, { "id": "identity_317322", "image": "PMC-VQA/images/PMC4270221_fig02_345499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the procedure used to obtain this? \nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.216384410858154 }, { "id": "identity_111410", "image": "VQA2.0/images/390524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the kite closest to the top edge of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.046875, "loss": 4.06934118270874 }, { "id": "identity_311667", "image": "TextVQA/images/5f2ae2bcddb83a67.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which university is mentioned?" }, { "from": "gpt", "value": "The answer is michigan " } ], "clip_score": 18.640625, "loss": 4.9847798347473145 }, { "id": "identity_38126", "image": "PMC-VQA/images/PMC5946515_Fig2_313250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the anatomical location of the images shown in the given level? \nChoices:\n(A) Thorax \n(B) Breast \n(C) Cervical region \n(D) Upper limb " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.6875, "loss": 4.511576175689697 }, { "id": "identity_317475", "image": "DVQA/images/bar_train_00116945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the base?" }, { "from": "gpt", "value": "The answer is sand" } ], "clip_score": 20.6875, "loss": 4.467118740081787 }, { "id": "identity_301279", "image": "PMC-VQA/images/PMC6955838_brb31512-fig-0002_98485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture this image?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) PET" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.265625, "loss": 4.361668586730957 }, { "id": "identity_77237", "image": "TQA/images/abc_question_images/parts_microscope_17198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled 'W'?" }, { "from": "gpt", "value": "The answer is Base" } ], "clip_score": 20.9375, "loss": 4.983500003814697 }, { "id": "identity_18807", "image": "FigureQA/images/49115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.453125, "loss": 4.266039848327637 }, { "id": "identity_237497", "image": "A-OKVQA/images/390086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the ingredients in the dish and determine which ingredient is unique based on its preparation method or presentation" }, { "from": "gpt", "value": "The answer is meat" } ], "clip_score": 20.3125, "loss": 5.893764495849609 }, { "id": "identity_30075", "image": "A-OKVQA/images/503832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is closest to the giraffe?\nChoices:\n(A) baby eel\n(B) hat\n(C) zebra\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.221383094787598 }, { "id": "identity_286619", "image": "PMC-VQA/images/PMC8109045_FIG1_436471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MRI sequence type was utilized? \nOptions:\n(A) CT scan\n(B) T1 MRI sequence\n(C) T2 MRI sequence\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.4375, "loss": 4.022469520568848 }, { "id": "identity_255181", "image": "ScienceQA/images/13669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the material of the spatula and describe two properties that make this material suitable for kitchen use" }, { "from": "gpt", "value": "The answer is Rubber, heat resistance, flexibility" } ], "clip_score": 24.984375, "loss": 4.366697311401367 }, { "id": "identity_864", "image": "TextVQA/images/b190e374bbf24171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is the phone stand in the box?" }, { "from": "gpt", "value": "The answer is drivesafe" } ], "clip_score": 23.046875, "loss": 4.129139423370361 }, { "id": "identity_200917", "image": "A-OKVQA/images/213506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.453125, "loss": 4.527838706970215 }, { "id": "identity_328786", "image": "PMC-VQA/images/PMC5710314_f1_247204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What condition is responsible for the radiographic ground glass opacities?\nChoices:\n(A) Asthma\n(B) Bronchitis\n(C) Idiopathic pulmonary fibrosis\n(D) Pneumonia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.0625, "loss": 4.494532108306885 }, { "id": "identity_312662", "image": "ScienceQA/images/7599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boston\n(B) Baltimore\n(C) Annapolis\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.96875, "loss": 4.729836463928223 }, { "id": "identity_62765", "image": "ScienceQA/images/6837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many free ends does the tape have?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.21875, "loss": 4.5589799880981445 }, { "id": "identity_314959", "image": "FigureQA/images/53222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color the smallest segment?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.671875, "loss": 4.1241679191589355 }, { "id": "identity_297861", "image": "PMC-VQA/images/PMC7921924_diagnostics-11-00331-f005_380805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which specific part of the kidney appears enlarged?\nChoices:\n(A) Left kidney\n(B) Both kidneys\n(C) Renal sinus\n(D) Right kidney" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.640625, "loss": 4.334418296813965 }, { "id": "identity_302096", "image": "PMC-VQA/images/PMC5960142_Fig3_316258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in the images?\nChoices:\n(A) White blood cells\n(B) Neurons\n(C) Blood vessels\n(D) Muscles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.359375, "loss": 4.143549919128418 }, { "id": "identity_36205", "image": "IconQA/images/64936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is seventh?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.515625, "loss": 4.050274848937988 }, { "id": "identity_33746", "image": "A-OKVQA/images/495568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be listened to whose info is on the tent?\nChoices:\n(A) politician\n(B) symphony\n(C) radio\n(D) indie mixtape" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.421875, "loss": 4.236105918884277 }, { "id": "identity_6217", "image": "ScienceQA/images/3301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this basketball jersey made of?\nChoices:\n(A) polyester\n(B) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.609375, "loss": 4.293736934661865 }, { "id": "identity_322957", "image": "TQA/images/abc_question_images/human_system_digestive_16085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the liver?\nChoices:\n(A) T\n(B) W\n(C) C\n(D) P" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.506529808044434 }, { "id": "identity_31644", "image": "ScienceQA/images/15121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Montpelier\n(B) Frankfort\n(C) Harrisburg\n(D) Louisville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.140625, "loss": 4.295057773590088 }, { "id": "identity_259953", "image": "VQA-AS/images/8164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there between the picture closest to the left and the picture closest to the right?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 22.828125, "loss": 4.12256383895874 }, { "id": "identity_307113", "image": "A-OKVQA/images/555910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside the bottle sitting to the right of a red tin box?\nChoices:\n(A) soy sauce\n(B) ketchup\n(C) balsamic vinegar\n(D) oil" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.59375, "loss": 4.059305191040039 }, { "id": "identity_301254", "image": "PMC-VQA/images/PMC6104906_FIG1_357591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of signal is indicated by the red outline in the image?\nChoices:\n(A) Aortic stenosis signal\n(B) Pulmonary embolism signal\n(C) Mitral regurgitation signal\n(D) Mitral stenosis signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.731249809265137 }, { "id": "identity_23790", "image": "Super-CLEVR/images/superCLEVR_new_000061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any brown shiny articulated buss visible?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.890625, "loss": 5.147778511047363 }, { "id": "identity_276757", "image": "VQA-RAD/images/synpic32727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the hyperattenuations located in relation to the patient?" }, { "from": "gpt", "value": "The answer is Ribs" } ], "clip_score": 25.09375, "loss": 4.041673183441162 }, { "id": "identity_54453", "image": "DocVQA/images/ppxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the note contain any punctuation marks?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.953125, "loss": 4.220635890960693 }, { "id": "identity_299664", "image": "TQA/images/question_images/solar_system_4574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which planet is the largest among the following options: Earth, Saturn, Jupiter, or Neptune?\nChoices:\n(A) Earth\n(B) Saturn\n(C) Jupiter\n(D) Neptune" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.640625, "loss": 4.312539100646973 }, { "id": "identity_124216", "image": "VQA-AS/images/6526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What food items are visible on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Three food items" } ], "clip_score": 32.34375, "loss": 4.116942405700684 }, { "id": "identity_207618", "image": "VQA2.0/images/409681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.75, "loss": 4.035626411437988 }, { "id": "identity_338364", "image": "A-OKVQA/images/319275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the weather like here?\nChoices:\n(A) moderate\n(B) below freezing\n(C) mild\n(D) temperate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 4.508909225463867 }, { "id": "identity_43715", "image": "DVQA/images/bar_train_00069281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the least total units sold when combining both stores?" }, { "from": "gpt", "value": "The answer is pause" } ], "clip_score": 28.703125, "loss": 4.191397666931152 }, { "id": "identity_284618", "image": "A-OKVQA/images/260141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found above the water surface?\nChoices:\n(A) squirrels\n(B) bears\n(C) surfers\n(D) boats" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.359375, "loss": 4.229327201843262 }, { "id": "identity_104578", "image": "PMC-VQA/images/PMC2747439_F0005_46548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of this image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 28.078125, "loss": 4.19966459274292 }, { "id": "identity_328652", "image": "A-OKVQA/images/546378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this animal near?\nChoices:\n(A) dog\n(B) boxes\n(C) egg carton\n(D) books" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.671875, "loss": 4.128209590911865 }, { "id": "identity_133499", "image": "TextVQA/images/12262bd2c9cfd517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the leading athlete's shorts?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.71875, "loss": 4.005705833435059 }, { "id": "identity_316145", "image": "IconQA/images/88603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.390625, "loss": 4.469418525695801 }, { "id": "identity_212658", "image": "Super-CLEVR/images/superCLEVR_new_000773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vehicle located in the middle of the image?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 23.96875, "loss": 4.025163650512695 }, { "id": "identity_18622", "image": "AI2D/images/2528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The Earth is represented at which letter?\nChoices:\n(A) K\n(B) G\n(C) F\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.84375, "loss": 4.401423454284668 }, { "id": "identity_216677", "image": "Super-CLEVR/images/superCLEVR_new_000265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant material of the object in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 24.40625, "loss": 4.2157063484191895 }, { "id": "identity_307788", "image": "TabMWP/images/17660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 4.4642205238342285 }, { "id": "identity_284967", "image": "A-OKVQA/images/67178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which language-speaking country are we currently located?\nChoices:\n(A) Spanish\n(B) English\n(C) French\n(D) German" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.03125, "loss": 4.150035858154297 }, { "id": "identity_300720", "image": "AI2D/images/3302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the Golgi?\nChoices:\n(A) G\n(B) A\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.8984375, "loss": 4.656157493591309 }, { "id": "identity_259384", "image": "PMC-VQA/images/PMC5835267_fig3_283464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the anatomical landmarks and surgical tools visible in the image, and how do they indicate the type of fracture?" }, { "from": "gpt", "value": "The answer is Sacral fracture" } ], "clip_score": 25.734375, "loss": 4.256117820739746 }, { "id": "identity_5272", "image": "A-OKVQA/images/233404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the scarf wrapped around the suitcase pulled on the left?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3515625, "loss": 4.010438442230225 }, { "id": "identity_328374", "image": "ScienceQA/images/13107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property matches this item?\nChoices:\n(A) scratchy\n(B) smooth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.34375, "loss": 4.707005500793457 }, { "id": "identity_337516", "image": "A-OKVQA/images/262006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are inside these vehicles driving here?\nChoices:\n(A) tourists\n(B) prisoners\n(C) zoo keepers\n(D) hunters" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.453125, "loss": 4.566232204437256 }, { "id": "identity_284465", "image": "PMC-VQA/images/PMC6711278_f1_29028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was shown by the color Doppler imaging? \nChoices:\n(A) Central vascularity \n(B) Peripheral vascularity \n(C) No vascularity \n(D) Interstitial vascularity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.109375, "loss": 4.746028423309326 }, { "id": "identity_338317", "image": "A-OKVQA/images/365206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of traffic is allowed to go through here at this time?\nChoices:\n(A) foot only\n(B) all\n(C) buses\n(D) motorcycles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.8828125, "loss": 4.29581880569458 }, { "id": "identity_242705", "image": "PMC-VQA/images/PMC9415991_nanomaterials-12-02873-f001_386532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas represent both α and β in the Figure?" }, { "from": "gpt", "value": "The answer is No common area" } ], "clip_score": 18.40625, "loss": 4.392461776733398 }, { "id": "identity_296107", "image": "PMC-VQA/images/PMC5894172_Fig2_299739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which diagnostic imaging technique was utilized to identify the irregular mass in the right kidney?\nChoices:\n(A) Magnetic resonance imaging (MRI)\n(B) Computed tomography (CT) scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.421875, "loss": 4.396777153015137 }, { "id": "identity_192400", "image": "PMC-VQA/images/PMC7873062_Fig1_364493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are labeled with \"Peduncles\"?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.796875, "loss": 4.325273513793945 }, { "id": "identity_305055", "image": "ScienceQA/images/10517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these two items share?\nChoices:\n(A) blue\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.65625, "loss": 4.272360324859619 }, { "id": "identity_258300", "image": "A-OKVQA/images/160064.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three distinct features in the image that indicate it is a commercial area, and explain how each feature supports this classification" }, { "from": "gpt", "value": "The answer is Businesses, street activity, urban infrastructure" } ], "clip_score": 23.625, "loss": 4.22191047668457 }, { "id": "identity_302950", "image": "A-OKVQA/images/204683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kinds of muffins are shown here?\nChoices:\n(A) raisin\n(B) poppy\n(C) apple\n(D) blueberry" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.03125, "loss": 4.1253862380981445 }, { "id": "identity_241493", "image": "DVQA/images/bar_train_00120599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest average accuracy across all the datasets?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 33.9375, "loss": 4.30372428894043 }, { "id": "identity_21210", "image": "PMC-VQA/images/PMC8870476_cancers-14-00879-f002_202517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the lesion suggest?\nChoices:\n(A) Complete recovery \n(B) Tumor recurrence \n(C) Inflammation \n(D) No change observed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 4.222890377044678 }, { "id": "identity_66118", "image": "AI2D/images/1801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different forms of nitrogen are depicted in the biofilter process?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.640625, "loss": 4.065718650817871 }, { "id": "identity_273107", "image": "PMC-VQA/images/PMC8029940_f10_412209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which modality was utilized to produce the image?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5625, "loss": 4.235335826873779 }, { "id": "identity_104765", "image": "A-OKVQA/images/181438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is growing above the chairs?" }, { "from": "gpt", "value": "The answer is A tree" } ], "clip_score": 22.546875, "loss": 4.285004615783691 }, { "id": "identity_54847", "image": "DocVQA/images/lycj0037_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who should be contacted to obtain labels for identifying the tobaccos?" }, { "from": "gpt", "value": "The answer is Wayne Sanders, Macon Q.A" } ], "clip_score": 19.453125, "loss": 4.68394660949707 }, { "id": "identity_8389", "image": "AI2D/images/3818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the nutritive tissue within seeds of flowering plants, surrounding and absorbed by the embryo?\nChoices:\n(A) radicle\n(B) endosperm\n(C) epicotyl\n(D) seed coat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 4.048932075500488 }, { "id": "identity_37876", "image": "IconQA/images/42914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sub. Which picture is second?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.670315742492676 }, { "id": "identity_74041", "image": "ScienceQA/images/7476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the prefix \"Ante\" signify in the given words?" }, { "from": "gpt", "value": "The answer is Before" } ], "clip_score": 23.75, "loss": 4.4425859451293945 }, { "id": "identity_256406", "image": "A-OKVQA/images/284160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key features of this conveyance that indicate it is primarily used by tourists?" }, { "from": "gpt", "value": "The answer is Bright colors, sightseeing route, cultural area, frequent departures" } ], "clip_score": 22.4375, "loss": 4.341419219970703 }, { "id": "identity_2994", "image": "IconQA/images/32975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a crab. Which picture is third?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.6875, "loss": 4.015524864196777 }, { "id": "identity_309031", "image": "ScienceQA/images/4479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Philadelphia\n(B) Pittsburgh\n(C) Harrisburg\n(D) Albany" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.75, "loss": 5.013789653778076 }, { "id": "identity_283670", "image": "PMC-VQA/images/PMC4631848_fig3_440391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the status of the right ureter in the provided situation? \nOptions:\n(A) Narrowed\n(B) Dilated\n(C) Obstructed\n(D) Normal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.3125, "loss": 4.692452430725098 }, { "id": "identity_303711", "image": "TQA/images/question_images/human_system_digestive_3708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What links the mouth to the stomach?\nChoices:\n(A) esophagus\n(B) gallbladder\n(C) liver\n(D) large intestine" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.09375, "loss": 4.163018226623535 }, { "id": "identity_16094", "image": "DVQA/images/bar_train_00000775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is yield" } ], "clip_score": 22.96875, "loss": 4.557039737701416 }, { "id": "identity_144976", "image": "DVQA/images/bar_train_00075277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the least preference in the 'style' category?" }, { "from": "gpt", "value": "The answer is excess" } ], "clip_score": 29.03125, "loss": 4.251490592956543 }, { "id": "identity_276451", "image": "AI2D/images/3275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure encloses the nucleolus in a cell?\nChoices:\n(A) Ribosome\n(B) Nucleus\n(C) Lysosome\n(D) Centrioles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.609375, "loss": 4.036323070526123 }, { "id": "identity_137722", "image": "DocVQA/images/pxpw0023_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this facsimile?" }, { "from": "gpt", "value": "The answer is Tom" } ], "clip_score": 20.21875, "loss": 4.970122337341309 }, { "id": "identity_322348", "image": "PMC-VQA/images/PMC9399794_F1_378886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging finding after this procedure?\nChoices:\n(A) Hypodensity\n(B) Hyperdensity\n(C) Isodensity\n(D) No change in density" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.796875, "loss": 4.396210193634033 }, { "id": "identity_268377", "image": "VQA2.0/images/140690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the majority of the seats in the stadium vacant?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.625, "loss": 4.411589622497559 }, { "id": "identity_298783", "image": "PMC-VQA/images/PMC3777960_pone-0074753-g003_232606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reference of the circle in the figure? Is it related to areas of necrosis, treated tumors, normal cells, or untreated tumors?\nChoices:\n(A) areas of necrosis \n(B) treated tumors \n(C) normal cells \n(D) untreated tumors" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.546875, "loss": 4.5727128982543945 }, { "id": "identity_216411", "image": "A-OKVQA/images/513854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.5625, "loss": 4.198269844055176 }, { "id": "identity_145991", "image": "MapQA/images/map_4776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the value range 638181-638910?" }, { "from": "gpt", "value": "The answer is a color name" } ], "clip_score": 20.078125, "loss": 4.400498867034912 }, { "id": "identity_303641", "image": "PMC-VQA/images/PMC5703880_Fig8_244525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of microscope was used to take these images?\nChoices:\n(A) SEM \n(B) TEM \n(C) Confocal Microscopy \n(D) Fluorescence Microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 4.295976161956787 }, { "id": "identity_314939", "image": "PMC-VQA/images/PMC2222049_F2_16887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the heart has these conditions?\nChoices:\n(A) Aorta\n(B) Left Ventricle\n(C) Right Ventricle\n(D) Pulmonary artery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.859375, "loss": 4.699385643005371 }, { "id": "identity_333264", "image": "PMC-VQA/images/PMC5547522_Fig2_200908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black arrow in this image?\nChoices:\n(A) mass\n(B) duct expansion\n(C) cyst\n(D) necrosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 5.425961017608643 }, { "id": "identity_286050", "image": "AI2D/images/3792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is the petiole labeled with?\n(A) M\n(B) L\n(C) I\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.671875, "loss": 4.489532947540283 }, { "id": "identity_187434", "image": "A-OKVQA/images/111406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car parked on the right side of the street?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.359375, "loss": 4.596375465393066 }, { "id": "identity_301709", "image": "PMC-VQA/images/PMC4896362_F1_38630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method was used to produce the blue color in this? \nChoices:\n(A) Safranin-O \n(B) Crystal violet \n(C) Gram stain \n(D) Calcofluor white" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.71875, "loss": 4.1503119468688965 }, { "id": "identity_332480", "image": "A-OKVQA/images/536145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is likely the word between \"see\" and \"now\" on this person's back?\nChoices:\n(A) mexico\n(B) me\n(C) money\n(D) mom" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.3828125, "loss": 4.10799503326416 }, { "id": "identity_208156", "image": "A-OKVQA/images/356940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cows are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.375, "loss": 4.208266735076904 }, { "id": "identity_264743", "image": "AI2D/images/649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which layer is located in the middle of the diagram? \n(A) outer core\n(B) crust\n(C) inner core\n(D) mantle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.3125, "loss": 4.222197532653809 }, { "id": "identity_332773", "image": "AI2D/images/4110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there are no more of these, what will happen to air?\nChoices:\n(A) Nothing will happen\n(B) No oxygen for air\n(C) Not sure\n(D) Air will be good" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.71875, "loss": 4.52282190322876 }, { "id": "identity_106392", "image": "VQA-AS/images/5562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the scene depict?" }, { "from": "gpt", "value": "The answer is Evening or night" } ], "clip_score": 20.53125, "loss": 4.297216892242432 }, { "id": "identity_290949", "image": "Super-CLEVR/images/superCLEVR_new_001465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the jet and the motorbike located behind the large metal biplane painted in the same color?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.859375, "loss": 5.373748779296875 }, { "id": "identity_287988", "image": "A-OKVQA/images/556278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the source of illumination for the street?\nChoices:\n(A) fire\n(B) sun\n(C) street lights\n(D) flashlights" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.760587215423584 }, { "id": "identity_17158", "image": "TextVQA/images/2235bca399bfd395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does it say on the front of the blue and white rail car?" }, { "from": "gpt", "value": "The answer is geox" } ], "clip_score": 15.7421875, "loss": 4.827132225036621 }, { "id": "identity_307853", "image": "ScienceQA/images/4138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is slate a mineral or something else?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.84375, "loss": 4.497427940368652 }, { "id": "identity_322459", "image": "VQA2.0/images/167559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this pointed at?" }, { "from": "gpt", "value": "The answer is toward medicine cabinet" } ], "clip_score": 24.453125, "loss": 4.233908176422119 }, { "id": "identity_284223", "image": "Super-CLEVR/images/superCLEVR_new_000546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small matte object in front of the large chopper have the same shape as the shiny object in front of the tiny purple wagon?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.15625, "loss": 5.13804817199707 }, { "id": "identity_263165", "image": "A-OKVQA/images/341409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sport's equipment is easily accessible here among the following options: springboard diving, surfing, sailing, or fishing?\nChoices:\n(A) springboard diving\n(B) surfing\n(C) sailing\n(D) fishing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 4.035930633544922 }, { "id": "identity_294942", "image": "ScienceQA/images/1806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated by the highlighted area?\nChoices:\n(A) Australia\n(B) Asia\n(C) Africa\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.53125, "loss": 4.440860271453857 }, { "id": "identity_309125", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f006_203201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sacroiliac joint shows anterior joint widening in this image?\nChoices:\n(A) Left \n(B) Right \n(C) Both Left and Right \n(D) None of them" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.734375, "loss": 4.254088401794434 }, { "id": "identity_271618", "image": "A-OKVQA/images/202582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which geographical region of a country is this location part of? \n(A) inland\n(B) valley\n(C) coast\n(D) mountaintop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.7734375, "loss": 4.495658874511719 }, { "id": "identity_127861", "image": "TextVQA/images/01375ce0b7e9e9f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.171875, "loss": 4.082122325897217 }, { "id": "identity_31433", "image": "PMC-VQA/images/PMC4846987_f5_24859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the top row of images in this figure show? \nChoices:\n(A) Segmentations of a brain tumor \n(B) Segmentations at three different time points \n(C) FLAIR images at three different time points \n(D) Overlays of the segmentations of a single time point " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.140625, "loss": 4.603503227233887 }, { "id": "identity_319466", "image": "TextVQA/images/4748e3595f9984b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company is the red banner displaying?" }, { "from": "gpt", "value": "The answer is santand" } ], "clip_score": 15.3828125, "loss": 4.353243827819824 }, { "id": "identity_133913", "image": "Super-CLEVR/images/superCLEVR_new_001989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the yellow motorcycle?" }, { "from": "gpt", "value": "The answer is Sports car" } ], "clip_score": 16.859375, "loss": 4.95653772354126 }, { "id": "identity_17987", "image": "AI2D/images/713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What exerts pressure on the Rock?\nChoices:\n(A) Ocean\n(B) Lake\n(C) River\n(D) Pond" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.21875, "loss": 4.164031028747559 }, { "id": "identity_792", "image": "ScienceQA/images/3357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Saint Paul\n(B) Fort Wayne\n(C) Indianapolis\n(D) Columbus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.671875, "loss": 4.319946765899658 }, { "id": "identity_295375", "image": "PMC-VQA/images/PMC6748179_fig7_38893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in images (b-d)?\nOptions:\n(A) Ground truth annotation\n(B) Study objective\n(C) Raw data\n(D) Validation results" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.5546875, "loss": 4.254390239715576 }, { "id": "identity_140288", "image": "MapQA/images/map_4133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the middle percentage range for advisers?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 31.53125, "loss": 4.079032897949219 }, { "id": "identity_22986", "image": "PMC-VQA/images/PMC7875425_pone.0246752.g003_365450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the green arrows illustrate in the representative images? \nChoices:\n(A) Normal ducts with high expression \n(B) Invasive cells with high expression \n(C) Normal ducts with low expression \n(D) Invasive cells with low expression " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 4.107828617095947 }, { "id": "identity_54466", "image": "DVQA/images/bar_train_00138951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the second highest number of units sold?" }, { "from": "gpt", "value": "The answer is sale" } ], "clip_score": 26.40625, "loss": 4.084114074707031 }, { "id": "identity_276704", "image": "FigureQA/images/50087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Dark Turquoise or Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5625, "loss": 4.719630241394043 }, { "id": "identity_334360", "image": "PMC-VQA/images/PMC6824487_f1_59614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the origin of these tomographic slices?\nChoices:\n(A) CT scan\n(B) MRI\n(C) Ultrasonography\n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.296875, "loss": 4.748361110687256 }, { "id": "identity_318319", "image": "A-OKVQA/images/305000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might the person standing here purchase?\nChoices:\n(A) grains\n(B) produce\n(C) toys\n(D) meats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.125, "loss": 4.611927032470703 }, { "id": "identity_287845", "image": "PMC-VQA/images/PMC7562849_ccr33031-fig-0001_274914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the images provided, where is the lesion located?\nChoices:\n(A) Right pulmonary apex\n(B) Left pulmonary apex\n(C) Left lower lobe\n(D) Right lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.4999189376831055 }, { "id": "identity_280414", "image": "FigureQA/images/3716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Olive Drab or Dodger Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 4.665024757385254 }, { "id": "identity_262790", "image": "Super-CLEVR/images/superCLEVR_new_000547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any other objects share the same color as the small rubber bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.84375, "loss": 5.106138229370117 }, { "id": "identity_186744", "image": "VQA2.0/images/108892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible for sale in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 20.21875, "loss": 4.807105541229248 }, { "id": "identity_314992", "image": "FigureQA/images/18052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.062901496887207 }, { "id": "identity_5208", "image": "A-OKVQA/images/181367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman on the left near?\nChoices:\n(A) monkey\n(B) bicycle\n(C) egg\n(D) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.671875, "loss": 4.443583011627197 }, { "id": "identity_330584", "image": "PMC-VQA/images/PMC8113960_f2_438295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate here?\nChoices:\n(A) Negative cells\n(B) Viral RNA\n(C) Positive cells\n(D) Control cells." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.4375, "loss": 4.634491443634033 }, { "id": "identity_53021", "image": "DocVQA/images/hkym0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this fax?" }, { "from": "gpt", "value": "The answer is Sharon" } ], "clip_score": 28.90625, "loss": 4.2256178855896 }, { "id": "identity_333527", "image": "PMC-VQA/images/PMC4534109_Fig7_414055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of radiation is used in this procedure?\nChoices:\n(A) Alpha \n(B) Beta \n(C) Gamma \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.859375, "loss": 4.461269855499268 }, { "id": "identity_300092", "image": "IconQA/images/97996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.25, "loss": 4.049696922302246 }, { "id": "identity_130699", "image": "PMC-VQA/images/PMC9092047_F8_276679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the bone shown in the top image?" }, { "from": "gpt", "value": "The answer is An estimate based on the scale provided" } ], "clip_score": 24.6875, "loss": 4.046449184417725 }, { "id": "identity_335595", "image": "PMC-VQA/images/PMC5898261_BIO027060F3_301142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of micrographs were used to capture these images?\nChoices:\n(A) SEM and TEM \n(B) Bright field and dark field \n(C) Bright field and fluorescent \n(D) Fluorescent and Scanning" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.109375, "loss": 4.497701168060303 }, { "id": "identity_205663", "image": "A-OKVQA/images/292827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many carrots are visible in the bowl?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.328125, "loss": 4.588009357452393 }, { "id": "identity_290005", "image": "ScienceQA/images/15612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this shovel?\nChoices:\n(A) wood\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0, "loss": 4.338877201080322 }, { "id": "identity_32202", "image": "PMC-VQA/images/PMC5557509_Fig2_203153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color pattern of the cervical channel compared to the surrounding tissue in the first scenario? \nChoices:\n(A) Blue \n(B) Red \n(C) Green \n(D) Yellow " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.40625, "loss": 4.250461578369141 }, { "id": "identity_34529", "image": "VQA2.0/images/129914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the rocks in a line?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.4140625, "loss": 4.074543476104736 }, { "id": "identity_282418", "image": "PMC-VQA/images/PMC2673345_fig03_37795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the reference image?\nChoices:\n(A) Fat suppression. \n(B) Shifted fat signal. \n(C) Bone fractures. \n(D) Tumor." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.40625, "loss": 4.381094455718994 }, { "id": "identity_281312", "image": "A-OKVQA/images/368515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location in the picture is it most likely to find a baby?\nChoices:\n(A) stroller\n(B) building\n(C) car\n(D) tree" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.1875, "loss": 4.384091377258301 }, { "id": "identity_208226", "image": "VizWiz/images/VizWiz_train_00003042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object to the right of the grill?" }, { "from": "gpt", "value": "The answer is A fan" } ], "clip_score": 27.359375, "loss": 4.323019504547119 }, { "id": "identity_121555", "image": "Super-CLEVR/images/superCLEVR_new_000236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is depicted in the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Harmonica bus" } ], "clip_score": 24.15625, "loss": 5.494018077850342 }, { "id": "identity_289303", "image": "A-OKVQA/images/538609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which internet device is currently being used at this desk? Is it a desktop computer, tablet, smartphone, or laptop computer?\nChoices:\n(A) desktop computer\n(B) tablet\n(C) smartphone\n(D) laptop computer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.171875, "loss": 4.650897026062012 }, { "id": "identity_279446", "image": "PMC-VQA/images/PMC5135722_Fig1_101981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the intracerebral hematoma located in the brain?\nChoices:\n(A) left hemisphere\n(B) right hemisphere\n(C) cerebellum\n(D) occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.078125, "loss": 4.3357110023498535 }, { "id": "identity_110177", "image": "VQA2.0/images/476634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects besides the phone and beans in the image?" }, { "from": "gpt", "value": "The answer is No, only a basket and wooden edge" } ], "clip_score": 30.765625, "loss": 5.030513286590576 }, { "id": "identity_288266", "image": "ScienceQA/images/16794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these two objects share among the following choices: yellow or salty?\nChoices:\n(A) yellow\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 4.200385093688965 }, { "id": "identity_312181", "image": "A-OKVQA/images/111030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these blue signs being used for?\nChoices:\n(A) decoration\n(B) advertising\n(C) selling\n(D) directing traffic" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.359375, "loss": 4.122105598449707 }, { "id": "identity_12082", "image": "PMC-VQA/images/PMC3750476_F5_226406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What microscopy technique was used to capture the image? \nChoices:\n(A) Transmission electron microscopy \n(B) Scanning electron microscopy \n(C) Confocal microscopy \n(D) Fluorescence microscopy " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.328125, "loss": 4.204394817352295 }, { "id": "identity_216868", "image": "VQA2.0/images/329343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vases can be seen on the mantelpiece above the fireplace?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.953125, "loss": 4.460155487060547 }, { "id": "identity_221286", "image": "A-OKVQA/images/563713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the vase?" }, { "from": "gpt", "value": "The answer is Terracotta" } ], "clip_score": 29.53125, "loss": 4.4154157638549805 }, { "id": "identity_106855", "image": "VQA2.0/images/33431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person with the \"Mizzou\" shirt?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 17.640625, "loss": 5.179287433624268 }, { "id": "identity_104868", "image": "A-OKVQA/images/329903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bears have a heart on them?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.296875, "loss": 4.308586597442627 }, { "id": "identity_147941", "image": "DocVQA/images/kzbn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the concept of reasonable foreseeability associated with?" }, { "from": "gpt", "value": "The answer is American and western law" } ], "clip_score": 19.34375, "loss": 5.016650199890137 }, { "id": "identity_310240", "image": "AI2D/images/3865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the layer of cutin covering the epidermis of the aerial parts of plants?\nChoices:\n(A) J\n(B) E\n(C) G\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5625, "loss": 4.034657955169678 }, { "id": "identity_318882", "image": "ScienceQA/images/7592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Vanuatu\n(B) Nauru\n(C) New Zealand\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.015625, "loss": 4.539813995361328 }, { "id": "identity_317792", "image": "PMC-VQA/images/PMC7567400_f5_276970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the top-left corner of the image?\nChoices:\n(A) Delta score\n(B) Sum of scores between technician and DeepMedic\n(C) Mean neuroradiologist score for each exam\n(D) Mean technician score for each exam" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.265625, "loss": 4.360121726989746 }, { "id": "identity_336938", "image": "ScienceQA/images/19708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is marked?\nChoices:\n(A) Kiribati\n(B) Tuvalu\n(C) Tonga\n(D) Solomon Islands" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 4.141345500946045 }, { "id": "identity_36009", "image": "TextVQA/images/d7c55f8bb639805d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the camera company on display?" }, { "from": "gpt", "value": "The answer is rashomon" } ], "clip_score": 11.515625, "loss": 5.395053863525391 }, { "id": "identity_1911", "image": "A-OKVQA/images/211632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to create the picture on the wall?\nChoices:\n(A) knitting\n(B) printing\n(C) painting\n(D) embroidery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.4375, "loss": 4.0370354652404785 }, { "id": "identity_26857", "image": "A-OKVQA/images/196035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What protection does an umbrella offer here?\nChoices:\n(A) locusts\n(B) nuclear fallout\n(C) rain\n(D) sun shade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.4609375, "loss": 4.303759574890137 }, { "id": "identity_21193", "image": "ScienceQA/images/4305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) breakable\n(B) soft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 4.488002777099609 }, { "id": "identity_211091", "image": "A-OKVQA/images/267435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual jam containers are visible on the table?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 16.84375, "loss": 5.172884941101074 }, { "id": "identity_288207", "image": "A-OKVQA/images/78986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which laptop brand is the subject of the advertisement? Is it Dell, Asus, HP, or Lenovo?\nChoices:\n(A) dell\n(B) asus\n(C) hp\n(D) lenovo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.59375, "loss": 4.056272029876709 }, { "id": "identity_320250", "image": "PMC-VQA/images/PMC8546700_fig4_96332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the bright-field image overlay?\nChoices:\n(A) Microscopic view of a mosquito\n(B) Microscopic view of a sporozoite\n(C) Microscopic view of a skin cell\n(D) Microscopic view of a hair follicle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.046875, "loss": 4.259191513061523 }, { "id": "identity_295284", "image": "IconQA/images/34375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the stars in the group an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.40625, "loss": 4.008248329162598 }, { "id": "identity_288425", "image": "A-OKVQA/images/138747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which surface reflects the glare from the sun, causing distraction for drivers? Is it:\n(A) pocket mirrors\n(B) cell phone\n(C) camera flash\n(D) motorcycle shields" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.0625, "loss": 4.340296745300293 }, { "id": "identity_212795", "image": "VQA2.0/images/26662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cartons of milk are there in the fridge?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.09375, "loss": 4.447526454925537 }, { "id": "identity_335411", "image": "TabMWP/images/37211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Colin noted down the minutes he spent practicing tennis over the past 5 days. What is the range of these numbers?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 27.53125, "loss": 4.121870517730713 }, { "id": "identity_314649", "image": "AI2D/images/522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram of a food chain and your knowledge of science, if the population of hawks increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) increase\n(C) cannot determine\n(D) remain the same" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.359375, "loss": 4.046442985534668 }, { "id": "identity_193070", "image": "PMC-VQA/images/PMC7316673_Fig3_199167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the presence of an effusion be determined from this image?" }, { "from": "gpt", "value": "The answer is No effusion present" } ], "clip_score": 24.953125, "loss": 4.035477161407471 }, { "id": "identity_16065", "image": "IconQA/images/105279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.125136375427246 }, { "id": "identity_272467", "image": "TextVQA/images/350cfcd3adaa9891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the bar?" }, { "from": "gpt", "value": "The answer is colony house" } ], "clip_score": 15.03125, "loss": 4.793825626373291 }, { "id": "identity_296278", "image": "DocVQA/images/ysbd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By whom will the members be informed about the time, date, and location of meetings?" }, { "from": "gpt", "value": "The answer is panel chairmen" } ], "clip_score": 20.46875, "loss": 4.415423393249512 }, { "id": "identity_208810", "image": "PMC-VQA/images/PMC3253128_pone-0029958-g015_121427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely function of the tissue shown in this image?" }, { "from": "gpt", "value": "The answer is Contraction and movement" } ], "clip_score": 24.0, "loss": 4.144847869873047 }, { "id": "identity_10921", "image": "ScienceQA/images/5804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Texas?\nChoices:\n(A) Austin\n(B) Frankfort\n(C) Santa Fe\n(D) Houston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.234375, "loss": 4.274201393127441 }, { "id": "identity_216757", "image": "VizWiz/images/VizWiz_train_00013141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What function is highlighted on the blue dial of the microwave?" }, { "from": "gpt", "value": "The answer is Start/Stop" } ], "clip_score": 32.25, "loss": 4.245038032531738 }, { "id": "identity_21765", "image": "PMC-VQA/images/PMC6441667_Fig11_459429.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the mandible was resected? \nChoices:\n(A) Right mandible \n(B) Left mandible \n(C) Upper mandible \n(D) Lower mandible " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.421875, "loss": 4.116610527038574 }, { "id": "identity_54499", "image": "DocVQA/images/hjjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who requested the PPS Program?" }, { "from": "gpt", "value": "The answer is Susan King" } ], "clip_score": 22.1875, "loss": 4.663983345031738 }, { "id": "identity_123127", "image": "VQA2.0/images/508861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object to the right of the child?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 14.859375, "loss": 4.40755558013916 }, { "id": "identity_289015", "image": "TabMWP/images/11581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which day of the past 4 days had the lowest number of copies sold, according to the examination conducted by the newspaper staff?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 4.362581729888916 }, { "id": "identity_74983", "image": "TQA/images/question_images/seasons_2518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which season is represented at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Summer" } ], "clip_score": 24.625, "loss": 4.048524856567383 }, { "id": "identity_24888", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f001_203185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone of the sacrum is involved in the neuroforaminal sacral fracture? \nChoices:\n(A) Zone I \n(B) Zone II \n(C) Zone III \n(D) Zone IV " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0625, "loss": 4.099936485290527 }, { "id": "identity_325695", "image": "PMC-VQA/images/PMC7353682_Fig1_211897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of anomaly is seen in this image?\nChoices:\n(A) absence of a kidney\n(B) fusion of some upper poles\n(C) fusion of some lower poles\n(D) enlargement of kidneys" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0625, "loss": 4.5186262130737305 }, { "id": "identity_316721", "image": "Super-CLEVR/images/superCLEVR_new_000020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same shape as this large blue object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.953125, "loss": 4.800037384033203 }, { "id": "identity_184242", "image": "A-OKVQA/images/420496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible pedestrian crossing lines are there?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.859375, "loss": 4.570095062255859 }, { "id": "identity_296036", "image": "PMC-VQA/images/PMC6728781_FIG1_34433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is depicted in the image provided?\nChoices:\n(A) Head \n(B) Chest \n(C) Abdomen \n(D) Pelvis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.28125, "loss": 4.2998504638671875 }, { "id": "identity_337512", "image": "PMC-VQA/images/PMC6814365_fig1s1_56521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown by the white outlines in the left panel?\nChoices:\n(A) Regions of Interest (ROI) \n(B) Thickness of cell membrane \n(C) The size of cells \n(D) Number of cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 5.3379225730896 }, { "id": "identity_320778", "image": "Super-CLEVR/images/superCLEVR_new_001597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the green object to the left of the small bicycle that is to the left of the small metallic aeroplane made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 29.78125, "loss": 4.282712936401367 }, { "id": "identity_36495", "image": "PMC-VQA/images/PMC6195911_f1_384201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the corpus spongiosum in the image? \nChoices:\n(A) Oedematous \n(B) Normal \n(C) Inflamed \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.121649265289307 }, { "id": "identity_336313", "image": "A-OKVQA/images/68594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person holding a plastic bag?\nChoices:\n(A) being mischievous\n(B) as punishment\n(C) for fun\n(D) making purchase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6484375, "loss": 4.155752182006836 }, { "id": "identity_35759", "image": "A-OKVQA/images/297645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people queueing up for?\nChoices:\n(A) entering zoo\n(B) entering park\n(C) feeding elephants\n(D) riding elephants" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.59375, "loss": 4.155861854553223 }, { "id": "identity_331392", "image": "A-OKVQA/images/495117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might the people sleep?\nChoices:\n(A) car\n(B) trailer\n(C) field\n(D) horse" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 4.3392510414123535 }, { "id": "identity_296006", "image": "PMC-VQA/images/PMC6894893_FIG1_79450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you describe the shape of the gestational sac as shown in the ultrasound image?\nChoices:\n(A) Round\n(B) Oval\n(C) Elongated\n(D) Irregular" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 4.060790061950684 }, { "id": "identity_152270", "image": "ScienceQA/images/6128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries share the island on which the highlighted country is located?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.96875, "loss": 4.2143964767456055 }, { "id": "identity_295757", "image": "A-OKVQA/images/529314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable connection between the people in the stands and the players?\nChoices:\n(A) protesters\n(B) family\n(C) employees\n(D) advertising sponsors" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 5.413205146789551 }, { "id": "identity_15288", "image": "A-OKVQA/images/242034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man in the bottom left holding?\nChoices:\n(A) club\n(B) umbrella\n(C) cane\n(D) bat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.71875, "loss": 4.282620906829834 }, { "id": "identity_142197", "image": "DocVQA/images/qngh0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the R&D Product Standards Director?" }, { "from": "gpt", "value": "The answer is Hal E. Guess" } ], "clip_score": 24.078125, "loss": 4.447320938110352 }, { "id": "identity_205587", "image": "VQA2.0/images/452198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the guard booth located on the left or right side of the signboard?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 26.328125, "loss": 4.039118766784668 }, { "id": "identity_131812", "image": "PMC-VQA/images/PMC6479307_molecules-24-01192-f009_470003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of cell clustering in the image?" }, { "from": "gpt", "value": "The answer is No clustering observed" } ], "clip_score": 29.75, "loss": 4.766259670257568 }, { "id": "identity_195296", "image": "PMC-VQA/images/PMC4188846_Fig1_325915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image with respect to the patient's body?" }, { "from": "gpt", "value": "The answer is Apical view" } ], "clip_score": 28.625, "loss": 4.038205623626709 }, { "id": "identity_106187", "image": "VQA-AS/images/16713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many doors are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.0625, "loss": 4.397543907165527 }, { "id": "identity_338589", "image": "IconQA/images/58890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.7265625, "loss": 4.013336181640625 }, { "id": "identity_276787", "image": "FigureQA/images/97823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Red or Firebrick?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.796875, "loss": 4.709949016571045 }, { "id": "identity_307347", "image": "IconQA/images/31244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of stars even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.953125, "loss": 4.192631721496582 }, { "id": "identity_175639", "image": "IconQA/images/66417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more submarines or turtles in the image?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 29.609375, "loss": 4.116024017333984 }, { "id": "identity_266571", "image": "A-OKVQA/images/209844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the boy's desired action with the ball among the following choices: catch it, bounce it, hit it, or dodge it?\nChoices:\n(A) catch it\n(B) bounce it\n(C) hit it\n(D) dodge it" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.109375, "loss": 4.5075178146362305 }, { "id": "identity_267447", "image": "TQA/images/question_images/dna_6760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct term for the combination of sugar, phosphate group, and protein in a connected form?\nChoices:\n(A) hydrogen bond\n(B) deoxyribose\n(C) nucleotide\n(D) sugar-phosphate backbone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.080934047698975 }, { "id": "identity_263661", "image": "ScienceQA/images/10682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) Albany\n(C) Jersey City\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.765625, "loss": 4.106600761413574 }, { "id": "identity_242483", "image": "Geometry3K/images/1545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the values of \\( x \\) and \\( y \\) given that \\( AB = 3y - 8 \\) and \\( BC = 2x + 7 \\), and then use these values to find the perimeter of parallelogram \\( ABCD \\)" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 35.09375, "loss": 4.166981220245361 }, { "id": "identity_135894", "image": "DocVQA/images/lpff0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the fax mentioned in the 'Re:' section?" }, { "from": "gpt", "value": "The answer is ITIC continuous involvement with pesticides" } ], "clip_score": 22.53125, "loss": 4.656065940856934 }, { "id": "identity_326592", "image": "PMC-VQA/images/PMC7764608_diagnostics-10-01076-f009_328756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the cells commonly seen here?\nChoices:\n(A) Eosinophils\n(B) Lymphocytes\n(C) Neutrophils\n(D) Monocytes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.71875, "loss": 4.505303382873535 }, { "id": "identity_299350", "image": "A-OKVQA/images/479466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of product is being advertised in the pink box?\nChoices:\n(A) water\n(B) soda\n(C) coffee\n(D) gatorade" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.3046875, "loss": 4.0640997886657715 }, { "id": "identity_319523", "image": "AI2D/images/3340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost part of this structure?\nChoices:\n(A) vacuole\n(B) nucleus\n(C) cilia\n(D) plasma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0, "loss": 4.2624006271362305 }, { "id": "identity_329470", "image": "PMC-VQA/images/PMC5661094_fig6_232582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is depicted in the caption?\nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.122602462768555 }, { "id": "identity_272515", "image": "FigureQA/images/72413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.578125, "loss": 4.584314823150635 }, { "id": "identity_327149", "image": "PMC-VQA/images/PMC5486041_ijms-18-01218-f004_182994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the right section?\nChoices:\n(A) Evidence of tumor\n(B) Symptoms of tumor\n(C) No evidence of tumor\n(D) The cause of the tumor" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.90625, "loss": 4.531276226043701 }, { "id": "identity_106718", "image": "VQA2.0/images/231963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is depicted in the image?" }, { "from": "gpt", "value": "The answer is Steam trains" } ], "clip_score": 24.84375, "loss": 4.076368808746338 }, { "id": "identity_62062", "image": "DocVQA/images/mxfw0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the individual listed in the directory?" }, { "from": "gpt", "value": "The answer is Robert Ely Shank" } ], "clip_score": 24.6875, "loss": 4.366421222686768 }, { "id": "identity_283917", "image": "PMC-VQA/images/PMC7447497_F1_240040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the patient, which segment of the superior sagittal sinus is thrombosed?\nChoices:\n(A) Anterior segment\n(B) Middle segment\n(C) Parietal segment\n(D) Occipital segment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.21875, "loss": 4.2850518226623535 }, { "id": "identity_220752", "image": "A-OKVQA/images/53328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible green bottles are on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.0234375, "loss": 4.989049911499023 }, { "id": "identity_295205", "image": "A-OKVQA/images/529698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following individuals or entities is the owner of this bike: a city resident, Marilyn Manson, a bike dealer, or Meatloaf?\nChoices:\n(A) city resident\n(B) marilyn manson\n(C) bike dealer\n(D) meatloaf" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.859375, "loss": 4.001562118530273 }, { "id": "identity_249267", "image": "PMC-VQA/images/PMC6242942_Fig2_397843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different colors representing inside the gallery, and how many distinct colors can be identified?" }, { "from": "gpt", "value": "The answer is four distinct colors" } ], "clip_score": 19.453125, "loss": 4.138289928436279 }, { "id": "identity_239677", "image": "A-OKVQA/images/391300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the contents and condition of the vehicle, what activities might the people who ride in the vehicle have engaged in during their trip, and what evidence supports this conclusion?" }, { "from": "gpt", "value": "The answer is vacation" } ], "clip_score": 22.234375, "loss": 5.548524379730225 }, { "id": "identity_32437", "image": "PMC-VQA/images/PMC5795345_F5_270675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain region is shown in the image? \nChoices:\n(A) right dorsal striatal region \n(B) left ventral striatal region \n(C) right ventral striatal region \n(D) left dorsal striatal region " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.5625, "loss": 4.2568278312683105 }, { "id": "identity_48917", "image": "DocVQA/images/hjxn0226_29.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which cementless stem has the highest number of cases in the period 2000-2004?" }, { "from": "gpt", "value": "The answer is PROFEMUR R VERS.4 Wright Cremascoli" } ], "clip_score": 19.8125, "loss": 4.822862148284912 }, { "id": "identity_245699", "image": "AI2D/images/567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify and describe the function of the structure labeled as \"leaf scar\" in the diagram, and explain how it relates to the overall growth and development of the plant" }, { "from": "gpt", "value": "The answer is The leaf scar indicates previous leaf attachment and contains bundle traces" } ], "clip_score": 33.71875, "loss": 4.347878456115723 }, { "id": "identity_323966", "image": "PMC-VQA/images/PMC5152543_F0001_104355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI scan is shown in the image?\nChoices:\n(A) T1-weighted\n(B) T2-weighted\n(C) FLAIR\n(D) DWI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.21875, "loss": 4.448892116546631 }, { "id": "identity_195685", "image": "PMC-VQA/images/PMC6776557_Fig6_45626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the nuclei stained in this image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.453125, "loss": 4.121790885925293 }, { "id": "identity_332481", "image": "A-OKVQA/images/310524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place does this appear to be?\nChoices:\n(A) mall courtyard\n(B) prison yard\n(C) college campus\n(D) beach resort" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.28125, "loss": 4.338304042816162 }, { "id": "identity_294053", "image": "ScienceQA/images/8848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object: sour or bumpy?\nChoices:\n(A) sour\n(B) bumpy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.292686462402344 }, { "id": "identity_304677", "image": "ScienceQA/images/385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony depicted?\nChoices:\n(A) Maryland\n(B) New Jersey\n(C) Rhode Island\n(D) Delaware" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.328125, "loss": 4.478013038635254 }, { "id": "identity_137670", "image": "DVQA/images/bar_train_00088904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the top?" }, { "from": "gpt", "value": "The answer is seed" } ], "clip_score": 17.953125, "loss": 4.765309810638428 }, { "id": "identity_292481", "image": "PMC-VQA/images/PMC8464884_jcm-10-04057-f002_66122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of measurement in CBCT scans? \nChoices:\n(A) Tooth alignment\n(B) Retromolar space\n(C) Gum thickness\n(D) Cavity depth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.109375, "loss": 4.662387847900391 }, { "id": "identity_308390", "image": "ScienceQA/images/15265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Santa Fe\n(B) Boulder\n(C) Colorado Springs\n(D) Denver" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.515625, "loss": 4.30958890914917 }, { "id": "identity_325721", "image": "DocVQA/images/yhfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is Cleveland Health Goals Project" } ], "clip_score": 22.40625, "loss": 4.795989990234375 }, { "id": "identity_249606", "image": "A-OKVQA/images/549849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image, and what is the predominant activity they are engaged in?" }, { "from": "gpt", "value": "The answer is 9, travel" } ], "clip_score": 22.40625, "loss": 4.714066505432129 }, { "id": "identity_265295", "image": "A-OKVQA/images/490511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body does the blue protective device help protect?\nChoices:\n(A) chest\n(B) head\n(C) knees\n(D) elbows" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.703125, "loss": 4.390166282653809 }, { "id": "identity_143058", "image": "DocVQA/images/hnnp0227_34.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the applicant's full name?" }, { "from": "gpt", "value": "The answer is Beth Elaine Straus Ron" } ], "clip_score": 26.0, "loss": 4.663656711578369 }, { "id": "identity_108998", "image": "TextVQA/images/220c90884cded523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic signs can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.203125, "loss": 4.284477233886719 }, { "id": "identity_324608", "image": "PMC-VQA/images/PMC4403878_pone.0124652.g007_379357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk indicate? \nChoices:\n(A) ectopic induction of puc-lacZ \n(B) wild type expression in the closed arrows \n(C) wild type expression in the open arrows \n(D) wild type expression in the stalk region of the disc" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.765625, "loss": 4.5039167404174805 }, { "id": "identity_10893", "image": "DVQA/images/bar_train_00094057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does the lightcoral color represent?" }, { "from": "gpt", "value": "The answer is plan" } ], "clip_score": 25.328125, "loss": 4.361180782318115 }, { "id": "identity_335220", "image": "PMC-VQA/images/PMC6425324_fig2_453966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal was used for the larynx reconstruction depicted in the image?\nChoices:\n(A) Cow\n(B) Sheep\n(C) Pig\n(D) Horse" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.109375, "loss": 4.023081302642822 }, { "id": "identity_290235", "image": "PMC-VQA/images/PMC3131277_pone-0021297-g001_101560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the middle image depict?\nChoices:\n(A) ROI of a reconstructed grey-level image\n(B) Customized linear filtering\n(C) Customized non-linear filtering\n(D) Segmentation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.640625, "loss": 4.9322590827941895 }, { "id": "identity_322460", "image": "DVQA/images/bar_train_00090913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest total accuracy across all datasets?" }, { "from": "gpt", "value": "The answer is notion" } ], "clip_score": 32.21875, "loss": 4.6745710372924805 }, { "id": "identity_30615", "image": "PMC-VQA/images/PMC8113960_f2_438295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in the image? \nChoices:\n(A) Negative cells \n(B) Viral RNA \n(C) Positive cells \n(D) Control cells. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.21875, "loss": 4.668842792510986 }, { "id": "identity_203755", "image": "A-OKVQA/images/412764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the tent on the far left?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.15625, "loss": 4.817519187927246 }, { "id": "identity_300856", "image": "ChartQA/images/18102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the graph that is higher in 2010?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.90625, "loss": 4.379724025726318 }, { "id": "identity_280150", "image": "VizWiz/images/VizWiz_train_00005486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of grass that this is?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 26.203125, "loss": 4.396581172943115 }, { "id": "identity_337424", "image": "PMC-VQA/images/PMC4730158_f4_466715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used to capture this? \nChoices:\n(A) SEM \n(B) TEM \n(C) AFM \n(D) XRD" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.890625, "loss": 4.248768329620361 }, { "id": "identity_310912", "image": "FigureQA/images/60882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.28125, "loss": 4.5759735107421875 }, { "id": "identity_232883", "image": "AI2D/images/1944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the insect population were to decrease, how would this impact the populations of birds, owls, and foxes/weasels, and what would be the subsequent effect on the lynx population?" }, { "from": "gpt", "value": "The answer is Lynx population might increase" } ], "clip_score": 30.15625, "loss": 4.450209140777588 }, { "id": "identity_261592", "image": "PMC-VQA/images/PMC3078894_F6_92691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the image?\nChoices:\n(A) Microorganisms interactions \n(B) Cellular organelles \n(C) Molecular interactions \n(D) Tissue samples" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.40625, "loss": 5.343953609466553 }, { "id": "identity_106796", "image": "Super-CLEVR/images/superCLEVR_new_000668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tiny matte thing that is left of the big chopper?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.109375, "loss": 4.459944725036621 }, { "id": "identity_334609", "image": "FigureQA/images/91303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the Green curve the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.21875, "loss": 4.211864471435547 }, { "id": "identity_271771", "image": "ScienceQA/images/19636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Ohio?\nChoices:\n(A) Boulder\n(B) Columbus\n(C) Portland\n(D) Cincinnati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.286167144775391 }, { "id": "identity_266246", "image": "TQA/images/abc_question_images/human_system_respiratory_16204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which letter represents the pharynx?\nChoices:\n(A) R\n(B) E\n(C) D\n(D) L" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.90625, "loss": 4.026180744171143 }, { "id": "identity_268822", "image": "A-OKVQA/images/533553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are buildings elevated high above the ground in this area?\nChoices:\n(A) flooding\n(B) avoiding monsters\n(C) pest control\n(D) earthquakes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2578125, "loss": 4.082430839538574 }, { "id": "identity_38099", "image": "AI2D/images/1495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage of the water cycle occurs when so much water has condensed that the air cannot hold it anymore?\nChoices:\n(A) evaporation\n(B) precipitation\n(C) condensation\n(D) transpiration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.84375, "loss": 4.129234313964844 }, { "id": "identity_20900", "image": "TabMWP/images/17134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at this table. Is this relation a function?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.453125, "loss": 4.070501804351807 }, { "id": "identity_283251", "image": "VQA2.0/images/197351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the uniforms worn by the team on the left?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 19.1875, "loss": 4.039802551269531 }, { "id": "identity_316012", "image": "TQA/images/question_images/human_system_respiratory_3598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located directly beneath your right lung?\nChoices:\n(A) pharynx\n(B) larynx\n(C) Diaphragm\n(D) left lung" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.484375, "loss": 4.3034515380859375 }, { "id": "identity_117432", "image": "VQA2.0/images/285131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is indicated by the red arrow?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 18.46875, "loss": 4.4612345695495605 }, { "id": "identity_320563", "image": "PMC-VQA/images/PMC5187579_brainsci-06-00065-f005_108210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What reference point is used in this planning process?\nChoices:\n(A) Left subthalamic nucleus \n(B) Right subthalamic nucleus \n(C) Posterior edge of the anterior commissure \n(D) Anterior edge of the posterior commissure" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.4375, "loss": 4.346205711364746 }, { "id": "identity_217177", "image": "VQA2.0/images/482295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is in the background on the left?" }, { "from": "gpt", "value": "The answer is A theater" } ], "clip_score": 17.671875, "loss": 4.327864170074463 }, { "id": "identity_8995", "image": "Super-CLEVR/images/superCLEVR_new_000628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small brown things that have the same shape as the large cyan rubber thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.953125, "loss": 5.146601676940918 }, { "id": "identity_329648", "image": "Super-CLEVR/images/superCLEVR_new_000437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small yellow bikes than gray wagons?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.765625, "loss": 5.320294380187988 }, { "id": "identity_286542", "image": "DocVQA/images/jzhd0227_38.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which room did the discussion take place?" }, { "from": "gpt", "value": "The answer is embassy west room" } ], "clip_score": 19.453125, "loss": 4.066036701202393 }, { "id": "identity_121326", "image": "VQA2.0/images/62995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many camera tripods are set up in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.1875, "loss": 4.472745418548584 }, { "id": "identity_188412", "image": "PMC-VQA/images/PMC2828401_F4_57868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the yellow arrow in the small grade 1 image?" }, { "from": "gpt", "value": "The answer is The knee joint space" } ], "clip_score": 28.828125, "loss": 4.173927307128906 }, { "id": "identity_328322", "image": "A-OKVQA/images/423341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is being used in the middle lane?\nChoices:\n(A) bus\n(B) motorcycle\n(C) car\n(D) bicycle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.15625, "loss": 4.46328592300415 }, { "id": "identity_149362", "image": "DocVQA/images/tsmk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the newsletter editor mentioned in the memo?" }, { "from": "gpt", "value": "The answer is Al Parker" } ], "clip_score": 23.671875, "loss": 5.17814826965332 }, { "id": "identity_1660", "image": "FigureQA/images/59403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.125, "loss": 4.345515251159668 }, { "id": "identity_214424", "image": "A-OKVQA/images/143576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many large pots are visible on the stove in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.359375, "loss": 4.221360206604004 }, { "id": "identity_331678", "image": "PMC-VQA/images/PMC2365560_pone-0002125-g002_21766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging method is used to visualize these tumors?\nChoices:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.4375, "loss": 4.122200965881348 }, { "id": "identity_237274", "image": "Super-CLEVR/images/superCLEVR_new_001180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the materials of the small bus, the motorbike behind the big motorbike, and the big motorbike the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.375, "loss": 5.278304100036621 }, { "id": "identity_38276", "image": "VizWiz/images/VizWiz_train_00007805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's that on the wall?" }, { "from": "gpt", "value": "The answer is storm drain gutter" } ], "clip_score": 24.9375, "loss": 4.707160949707031 }, { "id": "identity_34936", "image": "DVQA/images/bar_train_00141358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.46875, "loss": 4.6007280349731445 }, { "id": "identity_7376", "image": "PMC-VQA/images/PMC6920284_Fig11_86575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vein is mentioned in the caption? \nChoices:\n(A) superior vena cava \n(B) inferior vena cave \n(C) pulmonary vein \n(D) renal vein. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.848820686340332 }, { "id": "identity_207084", "image": "A-OKVQA/images/176403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What logo is predominantly featured on the banner in the background?" }, { "from": "gpt", "value": "The answer is redtouch media" } ], "clip_score": 17.84375, "loss": 4.966882228851318 }, { "id": "identity_233126", "image": "IconQA/images/76151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, what is the probability that at least one of them will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 31.203125, "loss": 4.700705528259277 }, { "id": "identity_337750", "image": "PMC-VQA/images/PMC6747042_f1_38423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows indicate in this image?\nChoices:\n(A) Correct fusions between sepals,\n(B) Imperfect fusions between sepals,\n(C) Healthy growth of sepals,\n(D) Lack of fusion between sepals." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 4.3821797370910645 }, { "id": "identity_297315", "image": "A-OKVQA/images/400371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What substance fills the gray object that the individual wearing a blue shirt and white shorts is seated on?\nChoices:\n(A) air\n(B) oil\n(C) meat\n(D) plastic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.3671875, "loss": 4.579200267791748 }, { "id": "identity_237940", "image": "TQA/images/abc_question_images/earth_eclipses_11656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled area represents the region where a total solar eclipse would be observed, and explain why this region experiences a total solar eclipse?" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 29.625, "loss": 4.497501850128174 }, { "id": "identity_312020", "image": "ScienceQA/images/18023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) rough\n(B) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5, "loss": 4.290274143218994 }, { "id": "identity_129586", "image": "VQA2.0/images/366009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.875, "loss": 4.440082550048828 }, { "id": "identity_211297", "image": "VQA2.0/images/480153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.6015625, "loss": 4.643213748931885 }, { "id": "identity_24496", "image": "A-OKVQA/images/95203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of pants does the woman in the red hat have on?\nChoices:\n(A) leather\n(B) bell bottoms\n(C) denim\n(D) capris" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4609375, "loss": 4.219939708709717 }, { "id": "identity_22143", "image": "A-OKVQA/images/33208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mammal is this traffic stop trying to keep safe by reminding drivers to drive safe?\nChoices:\n(A) human\n(B) dog\n(C) tiger\n(D) elephant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.71875, "loss": 4.169233798980713 }, { "id": "identity_253525", "image": "A-OKVQA/images/535676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key visual elements in the image that indicate the motorcade is celebrating a specific nation, and how do these elements correlate with the nation's cultural symbols?" }, { "from": "gpt", "value": "The answer is Ireland" } ], "clip_score": 27.1875, "loss": 5.118163108825684 }, { "id": "identity_30701", "image": "A-OKVQA/images/543551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is this bridge located in?\nChoices:\n(A) china\n(B) australia\n(C) britain\n(D) italy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.890625, "loss": 4.224505424499512 }, { "id": "identity_124050", "image": "TextVQA/images/feef6a8118f75295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items in the display are primarily red and white?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.03125, "loss": 4.435996055603027 }, { "id": "identity_142524", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the recipient of the meeting forward notification?" }, { "from": "gpt", "value": "The answer is Brown, Byron" } ], "clip_score": 23.140625, "loss": 4.8927106857299805 }, { "id": "identity_192278", "image": "TextVQA/images/6658f4496891ac39.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the keyboard designed for a specific type of typing as suggested by the packaging?" }, { "from": "gpt", "value": "The answer is Yes, general and office typing" } ], "clip_score": 23.53125, "loss": 4.598751544952393 }, { "id": "identity_317906", "image": "PMC-VQA/images/PMC6267895_Fig6_404450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging method was used to visualize the patient's lesions?\nChoices:\n(A) X-ray \n(B) Magnetic Resonance Imaging (MRI) \n(C) Computed Tomography (CT) \n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.78125, "loss": 4.091928482055664 }, { "id": "identity_305910", "image": "DVQA/images/bar_train_00078938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the palegoldenrod color?" }, { "from": "gpt", "value": "The answer is bundle" } ], "clip_score": 21.078125, "loss": 4.201505661010742 }, { "id": "identity_128761", "image": "A-OKVQA/images/138871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the carpet in the room?" }, { "from": "gpt", "value": "The answer is Dark gray" } ], "clip_score": 22.546875, "loss": 4.265766620635986 }, { "id": "identity_299663", "image": "A-OKVQA/images/319275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you describe the weather conditions in this location?\nChoices:\n(A) moderate\n(B) below freezing\n(C) mild\n(D) temperate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.334348201751709 }, { "id": "identity_327084", "image": "A-OKVQA/images/146583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the pattern of the stripes in these zebras?\nChoices:\n(A) identical\n(B) alike\n(C) unique\n(D) matching" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 4.098746299743652 }, { "id": "identity_303467", "image": "AI2D/images/649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the layer in the center of the diagram?\nChoices:\n(A) outer core\n(B) crust\n(C) inner core\n(D) mantle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 4.176994323730469 }, { "id": "identity_19516", "image": "VQA-AS/images/17980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the rug?" }, { "from": "gpt", "value": "The answer is middle of floor" } ], "clip_score": 20.734375, "loss": 4.170921802520752 }, { "id": "identity_314245", "image": "PMC-VQA/images/PMC7271094_Fig1_184019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used for reconstruction in the performed computed tomography angiography?\nChoices:\n(A) Colour Doppler study \n(B) Volume rendering technique \n(C) Pulmonary vein flow \n(D) Computed tomography angiography" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.093672752380371 }, { "id": "identity_239540", "image": "TQA/images/question_images/earth_moon_phases_2737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of the moon phases starting from the New Moon and ending at the Full Moon, and how many phases are there in total in this sequence?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.5, "loss": 4.50607442855835 }, { "id": "identity_37591", "image": "FigureQA/images/99717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Magenta the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.438416481018066 }, { "id": "identity_290110", "image": "VQA2.0/images/327895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of the man wearing a blue shirt?" }, { "from": "gpt", "value": "The answer is sidewalk" } ], "clip_score": 17.109375, "loss": 4.04164457321167 }, { "id": "identity_129561", "image": "A-OKVQA/images/214937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.9765625, "loss": 4.5214524269104 }, { "id": "identity_148778", "image": "DocVQA/images/mjvg0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last service mentioned on the right page?" }, { "from": "gpt", "value": "The answer is Services related to health needs" } ], "clip_score": 24.640625, "loss": 4.324150562286377 }, { "id": "identity_28532", "image": "PMC-VQA/images/PMC2614448_f4_32142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the invaginated structure seen in the uppermost spherule? \nChoices:\n(A) Synaptic ribbon \n(B) Cross-section of a rod soma \n(C) HCat process \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.703125, "loss": 4.5812883377075195 }, { "id": "identity_136819", "image": "DocVQA/images/spxw0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission mentioned in the report?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 23.296875, "loss": 4.541833400726318 }, { "id": "identity_274771", "image": "ScienceQA/images/6515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Massachusetts?\nChoices:\n(A) Trenton\n(B) Boston\n(C) Cambridge\n(D) Charleston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.359375, "loss": 4.17200231552124 }, { "id": "identity_107135", "image": "A-OKVQA/images/231295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cows are lying down in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 29.140625, "loss": 4.613468170166016 }, { "id": "identity_282286", "image": "PMC-VQA/images/PMC7925057_FIG6_382183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishes the left and right ultrasound images in terms of the joint shape, fluid depth, bone cortex presence, and type of ultrasound used?\nChoices:\n(A) Shape of the joint \n(B) Depth of fluid in joint \n(C) Presence of bone cortex \n(D) Type of ultrasound used" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.296875, "loss": 4.025815010070801 }, { "id": "identity_152372", "image": "ScienceQA/images/11458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many water tanks can be observed in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.859375, "loss": 4.333931922912598 }, { "id": "identity_262591", "image": "ScienceQA/images/20880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How can the tide pool ecosystems in Salt Point State Park be best characterized? \nOptions:\n(A) Abundant nutrients in the water and diverse organisms.\n(B) Absence of sunlight, along with daily flooding and draining of seawater." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.5625, "loss": 4.02247428894043 }, { "id": "identity_316868", "image": "TQA/images/abc_question_images/human_system_digestive_16072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the large intestine?\nChoices:\n(A) C\n(B) A\n(C) L\n(D) T" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.9375, "loss": 4.043902397155762 }, { "id": "identity_255414", "image": "A-OKVQA/images/340922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main issues being protested by the participants in the image?" }, { "from": "gpt", "value": "The answer is Government accountability, system distrust, banking reform, long-term protest" } ], "clip_score": 24.5625, "loss": 4.179877281188965 }, { "id": "identity_118858", "image": "A-OKVQA/images/386908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.96875, "loss": 4.32216215133667 }, { "id": "identity_10379", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g002_437031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow indicate in the figure? \nChoices:\n(A) The distribution of IED from frontal to parietal \n(B) The distribution of IED from frontotemporal to temporoparietal \n(C) The distribution of IED from occipital to temporal \n(D) The distribution of IED from parietal to temporal " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 4.50712776184082 }, { "id": "identity_312397", "image": "PMC-VQA/images/PMC6199147_FIG2_385376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of medical imaging was used to get this image?\nChoices:\n(A) X-ray\n(B) MRI\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.6875, "loss": 4.1780171394348145 }, { "id": "identity_320876", "image": "VQA2.0/images/475076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the blanket on the back of the animal?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 19.359375, "loss": 4.2625322341918945 }, { "id": "identity_13223", "image": "ScienceQA/images/11634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Harrisburg\n(B) Annapolis\n(C) Augusta\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.25, "loss": 4.23137092590332 }, { "id": "identity_296899", "image": "FigureQA/images/45737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.296875, "loss": 4.561705589294434 }, { "id": "identity_284978", "image": "ScienceQA/images/7604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New Hampshire\n(B) New Jersey\n(C) Maine\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.328125, "loss": 4.849373817443848 }, { "id": "identity_282126", "image": "FigureQA/images/87897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options correctly states whether Deep Pink is the roughest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.28125, "loss": 4.138067722320557 }, { "id": "identity_290485", "image": "A-OKVQA/images/486415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which multicolored food items are present in the two bowls?\nChoices:\n(A) fruit\n(B) vegetables\n(C) grains\n(D) legumes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0546875, "loss": 4.114930629730225 }, { "id": "identity_243010", "image": "VizWiz/images/VizWiz_train_00017775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme of the text in the image, and how does it relate to the visual elements present in the picture?" }, { "from": "gpt", "value": "The answer is The power of Allah over natural phenomena" } ], "clip_score": 29.6875, "loss": 4.091137886047363 }, { "id": "identity_307920", "image": "A-OKVQA/images/126020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of business is this truck for?\nChoices:\n(A) refrigerated items\n(B) food truck\n(C) delivering packages\n(D) gas delivery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.546875, "loss": 4.344048500061035 }, { "id": "identity_281969", "image": "A-OKVQA/images/552054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone is this area most likely categorized as: business, tourist, residential, or shopping?\nChoices:\n(A) business\n(B) tourist\n(C) residential\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1796875, "loss": 4.760830879211426 }, { "id": "identity_325081", "image": "ScienceQA/images/4286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Carson City\n(B) Boise\n(C) Sacramento\n(D) Reno" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.125, "loss": 4.275062084197998 }, { "id": "identity_24552", "image": "ScienceQA/images/17931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New York\n(B) North Carolina\n(C) Georgia\n(D) Virginia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.030016899108887 }, { "id": "identity_269380", "image": "AI2D/images/2303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter or letters in the diagram represent Pupa?\nChoices:\n(A) C & D\n(B) A & C\n(C) B & D\n(D) A & B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.828125, "loss": 4.267719745635986 }, { "id": "identity_336842", "image": "FigureQA/images/64116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value for Peru the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.5625, "loss": 4.6756439208984375 }, { "id": "identity_299776", "image": "VizWiz/images/VizWiz_train_00014188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you specify the type of soup that was served?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 28.5625, "loss": 4.2070698738098145 }, { "id": "identity_25075", "image": "PMC-VQA/images/PMC3178647_pone-0025316-g002_109634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the boxed area of (A)? \nChoices:\n(A) Epon without antibody labeling \n(B) The purpose of Pex10-GFP in P.tricornutum \n(C) Dividing P. tricornutum \n(D) Immuno labeling of Pex10-GFP. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.546875, "loss": 4.572099208831787 }, { "id": "identity_305406", "image": "TQA/images/question_images/human_system_respiratory_6194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the larynx and the pharynx?\nChoices:\n(A) pleural membrane\n(B) lung\n(C) nasal cavity\n(D) epiglottis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.234375, "loss": 4.049323081970215 }, { "id": "identity_270689", "image": "A-OKVQA/images/193540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color hair does the woman positioned at the back of this quartet have? Is it pink, blonde, red, or blue?\nChoices:\n(A) pink\n(B) blonde\n(C) red\n(D) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.1953125, "loss": 4.397740840911865 }, { "id": "identity_332877", "image": "A-OKVQA/images/301926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company's logo is visible on the white building?\nChoices:\n(A) mcdonalds\n(B) arbys\n(C) taco bell\n(D) burger king" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.296875, "loss": 4.199114799499512 }, { "id": "identity_266949", "image": "DocVQA/images/llyv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which agricultural extension agent is providing coastal city dwellers with the chance to visit Hendry County?" }, { "from": "gpt", "value": "The answer is Ray Burgess" } ], "clip_score": 22.859375, "loss": 4.37894868850708 }, { "id": "identity_297336", "image": "PMC-VQA/images/PMC5916486_F1_305842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which convention is utilized for the slice images in the figure? \nOptions:\n(A) Biomedical convention\n(B) Geological convention\n(C) Radiological convention\n(D) Solar system convention" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.390625, "loss": 4.384434700012207 }, { "id": "identity_222114", "image": "VizWiz/images/VizWiz_train_00015623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many metal studs are visible on the front of the purse?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.765625, "loss": 4.699615478515625 }, { "id": "identity_296018", "image": "PMC-VQA/images/PMC3509398_F2_169201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Above which artery does the RCS originate? \nChoices:\n(A) Left main coronary artery\n(B) Long LAD\n(C) Proximal RCA\n(D) Distal RCA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.765625, "loss": 4.19075870513916 }, { "id": "identity_105303", "image": "A-OKVQA/images/263974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are parked on the sidewalk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.90625, "loss": 4.371068000793457 }, { "id": "identity_274155", "image": "PMC-VQA/images/PMC8137781_Fig9_446471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific abnormality is observed in the capitellum on the lateral radiograph? \n(A) Joint Dislocation \n(B) Osseous Sclerosis \n(C) Bone Fragmentation \n(D) Tendon Tear" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 4.577182292938232 }, { "id": "identity_36979", "image": "AI2D/images/4016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Name the layer in between inner core and mantle?\nChoices:\n(A) Continental crust\n(B) Lithosphere\n(C) Outer core\n(D) Asthenosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.671875, "loss": 4.504148483276367 }, { "id": "identity_2644", "image": "DVQA/images/bar_train_00011913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the left?" }, { "from": "gpt", "value": "The answer is chance" } ], "clip_score": 16.421875, "loss": 4.031692028045654 }, { "id": "identity_186285", "image": "PMC-VQA/images/PMC6373363_Fig5_435174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colors are used to represent different structures or tracts in this image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.75, "loss": 4.0611042976379395 }, { "id": "identity_271183", "image": "AI2D/images/1403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the figure shown?\nChoices:\n(A) insect\n(B) goat\n(C) human\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.921875, "loss": 4.384469032287598 }, { "id": "identity_1802", "image": "A-OKVQA/images/406205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why have they made the trees blue?\nChoices:\n(A) visibility\n(B) attract pollinators\n(C) protect leaves\n(D) holidays" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.25, "loss": 4.02805233001709 }, { "id": "identity_307816", "image": "ScienceQA/images/17533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes this ecosystem?\nChoices:\n(A) It has cold, wet winters. It also has soil that is poor in nutrients.\n(B) It has soil that is rich in nutrients. It also has only a few types of trees." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.09375, "loss": 4.183039665222168 }, { "id": "identity_334463", "image": "A-OKVQA/images/284160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who most frequently rides this conveyance?\nChoices:\n(A) cable workers\n(B) prisoners\n(C) tourists\n(D) commuters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.9375, "loss": 4.403985500335693 }, { "id": "identity_285361", "image": "PMC-VQA/images/PMC5394153_Fig4_153497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the implication of the bone window image?\nChoices:\n(A) Mucocele \n(B) Benign tumor \n(C) Dermoid/Epidermoid cyst \n(D) Teratoma." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 4.045899391174316 }, { "id": "identity_282304", "image": "PMC-VQA/images/PMC7594836_F1_285004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass lesion located among the following choices: (A) Thoracic cavity, (B) Abdomen, (C) Pelvic cavity, or (D) Head and neck?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.390625, "loss": 4.74033260345459 }, { "id": "identity_222530", "image": "A-OKVQA/images/292543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cloth on the ox's back?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.984375, "loss": 4.112239837646484 }, { "id": "identity_12896", "image": "ScienceQA/images/17864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New Jersey\n(B) Connecticut\n(C) New York\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.953125, "loss": 4.26979923248291 }, { "id": "identity_105302", "image": "A-OKVQA/images/263974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bag that the person riding the bicycle is carrying?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.859375, "loss": 4.267851829528809 }, { "id": "identity_306184", "image": "TabMWP/images/38043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.640625, "loss": 4.256872653961182 }, { "id": "identity_318018", "image": "DocVQA/images/zhyc0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas were examined for the greatest caloric deficiency?" }, { "from": "gpt", "value": "The answer is Santiago and calama areas" } ], "clip_score": 21.65625, "loss": 5.4323015213012695 }, { "id": "identity_12597", "image": "ScienceQA/images/476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Newport\n(B) Augusta\n(C) Providence\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.09955358505249 }, { "id": "identity_330853", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g004_308561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the dotted line in the image?\nChoices:\n(A) The location of the brain,\n(B) The location of the axons,\n(C) The location of the ganglia,\n(D) The location of the larvae." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 4.0278167724609375 }, { "id": "identity_3540", "image": "FigureQA/images/70459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.75, "loss": 4.38834285736084 }, { "id": "identity_295026", "image": "A-OKVQA/images/514243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of shoes is the woman currently wearing? Is it:\n(A) sneakers\n(B) heels\n(C) flat\n(D) water shoes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.515985488891602 }, { "id": "identity_219792", "image": "PMC-VQA/images/PMC6221907_F3_393000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color used to stain the elongated structures in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.78125, "loss": 4.065520286560059 }, { "id": "identity_103392", "image": "A-OKVQA/images/120052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing hats?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.9296875, "loss": 4.295102119445801 }, { "id": "identity_222789", "image": "A-OKVQA/images/194462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the boat?" }, { "from": "gpt", "value": "The answer is Text in a foreign script, likely a name" } ], "clip_score": 14.328125, "loss": 4.223810195922852 }, { "id": "identity_307346", "image": "AI2D/images/3494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outer part of this called?\nChoices:\n(A) Cell wall\n(B) Vacuole\n(C) Nuclear membrane\n(D) Tonoplast" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.515625, "loss": 5.3159050941467285 }, { "id": "identity_196543", "image": "A-OKVQA/images/274593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company logo is visible on the fire hydrant?" }, { "from": "gpt", "value": "The answer is VIVAQUA" } ], "clip_score": 30.84375, "loss": 4.16896390914917 }, { "id": "identity_308663", "image": "PMC-VQA/images/PMC6158340_F2_372561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the origin of the RGCs in the image?\nChoices:\n(A) Mouse,\n(B) Monkey,\n(C) Human,\n(D) Rat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.9375, "loss": 4.31050443649292 }, { "id": "identity_303354", "image": "PMC-VQA/images/PMC3503823_pone-0049958-g004_167684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dotted white line indicate in the early pupal stage?\nChoices:\n(A) the wings of the butterfly\n(B) the eyes of the butterfly\n(C) the exoskeleton of the butterfly\n(D) the digestive system of the butterfly" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.859375, "loss": 4.4520673751831055 }, { "id": "identity_335721", "image": "A-OKVQA/images/297645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people waiting in line for?\nChoices:\n(A) entering zoo\n(B) entering park\n(C) feeding elephants\n(D) riding elephants" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.21875, "loss": 4.133805274963379 }, { "id": "identity_6742", "image": "AI2D/images/4108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What gas is emitted into the air in the cycle?\nChoices:\n(A) Oxygen\n(B) Sulfur\n(C) Calcium\n(D) Carbon dioxide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.203125, "loss": 4.408783912658691 }, { "id": "identity_297204", "image": "DocVQA/images/phvd0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which subject is referenced in the document?" }, { "from": "gpt", "value": "The answer is ppbs" } ], "clip_score": 17.890625, "loss": 6.046920299530029 }, { "id": "identity_114316", "image": "VQA-AS/images/9469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living beings are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.203125, "loss": 4.219888210296631 }, { "id": "identity_328775", "image": "A-OKVQA/images/184241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual with the racquets sitting on?\nChoices:\n(A) bed\n(B) tree stump\n(C) office chair\n(D) bench" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.5, "loss": 4.242801189422607 }, { "id": "identity_158143", "image": "ScienceQA/images/19280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of atoms in the acetaldehyde molecule?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 34.15625, "loss": 4.055031776428223 }, { "id": "identity_311144", "image": "A-OKVQA/images/64961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How was the shade created?\nChoices:\n(A) carving\n(B) sewing machine\n(C) weaving\n(D) axe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.4296875, "loss": 4.782476902008057 }, { "id": "identity_320760", "image": "PMC-VQA/images/PMC4121303_F1_311281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated with black arrows in this image?\nChoices:\n(A) Muscles.\n(B) Lungs.\n(C) Liver.\n(D) Heart." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.210217475891113 }, { "id": "identity_266078", "image": "Super-CLEVR/images/superCLEVR_new_001502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the regular bus located to the right of the tiny green object and the regular bus positioned to the left of the small green minivan constructed from the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.046875, "loss": 4.641598224639893 }, { "id": "identity_70594", "image": "ScienceQA/images/11814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.640625, "loss": 4.475855827331543 }, { "id": "identity_277789", "image": "AI2D/images/744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What celestial body orbits around the sun in the given diagram?\nChoices:\n(A) Earth\n(B) Moon\n(C) Star\n(D) Sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0, "loss": 4.166886806488037 }, { "id": "identity_72541", "image": "TQA/images/abc_question_images/photosynthesis_11264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the process of transpiration?" }, { "from": "gpt", "value": "The answer is Label U" } ], "clip_score": 23.875, "loss": 4.1281328201293945 }, { "id": "identity_275765", "image": "ScienceQA/images/11204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What observable trait did Palaeocyparis have? Choose the trait that is visible on the fossil.\nOptions:\n(A) flowers\n(B) short, thin leaves\n(C) cones" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.328125, "loss": 4.677002906799316 }, { "id": "identity_140211", "image": "DocVQA/images/khnk0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the recommended fumigants for 1974?" }, { "from": "gpt", "value": "The answer is Telone, D-D and Vidden-D" } ], "clip_score": 17.28125, "loss": 4.2653985023498535 }, { "id": "identity_336881", "image": "PMC-VQA/images/PMC7891795_FIG1_370421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the evidence of sinus disease in this image?\nChoices:\n(A) Right frontal sinus\n(B) Left sphenoid sinus\n(C) Right sphenoid sinus\n(D) Left maxillary sinus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.25, "loss": 4.6277875900268555 }, { "id": "identity_25125", "image": "A-OKVQA/images/334234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is lit up?\nChoices:\n(A) desk\n(B) traffic lights\n(C) bar\n(D) tree" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.625, "loss": 4.212823390960693 }, { "id": "identity_310713", "image": "FigureQA/images/12825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.21875, "loss": 4.083433151245117 }, { "id": "identity_213264", "image": "A-OKVQA/images/243875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man wearing on his head?" }, { "from": "gpt", "value": "The answer is A traditional cap" } ], "clip_score": 20.328125, "loss": 4.660687446594238 }, { "id": "identity_8419", "image": "AI2D/images/3123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where dose the leaf blade comes from?\nChoices:\n(A) leaf sheath\n(B) Stem\n(C) Branch\n(D) fused" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.09375, "loss": 4.184528827667236 }, { "id": "identity_330844", "image": "VizWiz/images/VizWiz_train_00013151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what kind of plant this is?" }, { "from": "gpt", "value": "The answer is wild flowers" } ], "clip_score": 26.265625, "loss": 4.1583170890808105 }, { "id": "identity_184486", "image": "PMC-VQA/images/PMC3684569_pone-0066608-g004_211826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a specific area in the image that is highlighted or marked?" }, { "from": "gpt", "value": "The answer is Yes, a dashed box" } ], "clip_score": 17.4375, "loss": 4.770872592926025 }, { "id": "identity_267174", "image": "A-OKVQA/images/458673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the green color of the pillars?\nChoices:\n(A) paint\n(B) moss\n(C) mold\n(D) rust" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5, "loss": 5.0925517082214355 }, { "id": "identity_300222", "image": "FigureQA/images/64791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Orange Red the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.578125, "loss": 4.075200080871582 }, { "id": "identity_330681", "image": "IconQA/images/102290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle as a face on this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.400860786437988 }, { "id": "identity_273230", "image": "PMC-VQA/images/PMC4661983_Fig6_448443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging study was conducted after a period of 12 months? \nOptions:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.953125, "loss": 4.372188568115234 }, { "id": "identity_22893", "image": "FigureQA/images/37880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.828125, "loss": 4.107476234436035 }, { "id": "identity_324878", "image": "A-OKVQA/images/69220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person holding?\nChoices:\n(A) chicken ring\n(B) cinnamon roll\n(C) zeppole\n(D) calamari" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.625, "loss": 4.173281192779541 }, { "id": "identity_788", "image": "AI2D/images/1836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What doe the grashopper need for nourishment?\nChoices:\n(A) grass\n(B) deer\n(C) snail\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.546875, "loss": 4.148011684417725 }, { "id": "identity_332267", "image": "ScienceQA/images/14924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is this?\nChoices:\n(A) metamorphic\n(B) sedimentary" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.351031303405762 }, { "id": "identity_65217", "image": "AI2D/images/464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What could be the possible reason for the difference in the appearance of the two containers?" }, { "from": "gpt", "value": "The answer is Texture, lighting, or dot density" } ], "clip_score": 25.015625, "loss": 4.196096420288086 }, { "id": "identity_219165", "image": "VQA2.0/images/18723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red flowers can be seen in the field?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.046875, "loss": 4.346229553222656 }, { "id": "identity_1246", "image": "ScienceQA/images/1566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) scratchy\n(B) shiny" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.3125, "loss": 4.393216609954834 }, { "id": "identity_110414", "image": "A-OKVQA/images/313201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.234375, "loss": 4.3507561683654785 }, { "id": "identity_132461", "image": "A-OKVQA/images/185943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Volkswagen Beetles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.984375, "loss": 4.703522682189941 }, { "id": "identity_298970", "image": "A-OKVQA/images/354444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is positioned on the roof of the car?\nChoices:\n(A) monkeys\n(B) birds\n(C) surfboard\n(D) tree limb" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.335417747497559 }, { "id": "identity_48733", "image": "DocVQA/images/qrdw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Medical Editor who sent the fax?" }, { "from": "gpt", "value": "The answer is Stephen M. Parker" } ], "clip_score": 28.453125, "loss": 4.779538154602051 }, { "id": "identity_129838", "image": "VQA2.0/images/398442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the vehicle in the center of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.296875, "loss": 4.247563362121582 }, { "id": "identity_119427", "image": "TextVQA/images/8924f92be9064c4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cup dispenser to the left of the clock?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.921875, "loss": 4.67323112487793 }, { "id": "identity_761", "image": "A-OKVQA/images/507889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is this?\nChoices:\n(A) stadium\n(B) port\n(C) beach\n(D) backyard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.1875, "loss": 4.394234657287598 }, { "id": "identity_318494", "image": "Super-CLEVR/images/superCLEVR_new_000129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the bus that is the same size as this motorbike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 28.3125, "loss": 4.4287519454956055 }, { "id": "identity_17051", "image": "TextVQA/images/8c59e48619bd4871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the beer sponsor for this game?" }, { "from": "gpt", "value": "The answer is budweirer" } ], "clip_score": 19.921875, "loss": 5.098771572113037 }, { "id": "identity_142808", "image": "DocVQA/images/ffml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the luncheon according to the telegram?" }, { "from": "gpt", "value": "The answer is To outline activities in foundation" } ], "clip_score": 28.3125, "loss": 4.639314651489258 }, { "id": "identity_302236", "image": "Super-CLEVR/images/superCLEVR_new_000505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the metallic object that is the same color as the small metallic jet?" }, { "from": "gpt", "value": "The answer is fighter" } ], "clip_score": 20.359375, "loss": 5.066131114959717 }, { "id": "identity_131330", "image": "TextVQA/images/0ef1e20affd684bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which political party does the speaker represent according to the on-screen text?" }, { "from": "gpt", "value": "The answer is SNP" } ], "clip_score": 30.46875, "loss": 4.43070650100708 }, { "id": "identity_171641", "image": "TabMWP/images/18462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which station has the first train departure after 5:00 A.M.?" }, { "from": "gpt", "value": "The answer is Campbell" } ], "clip_score": 27.09375, "loss": 4.444103717803955 }, { "id": "identity_41424", "image": "DVQA/images/bar_train_00022510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the right?" }, { "from": "gpt", "value": "The answer is prison" } ], "clip_score": 21.15625, "loss": 4.420708179473877 }, { "id": "identity_267879", "image": "FigureQA/images/42978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.234375, "loss": 4.1230597496032715 }, { "id": "identity_25731", "image": "PMC-VQA/images/PMC8138938_F3_446747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the turquoise color in the image? \nChoices:\n(A) It indicates the presence of cell walls, \n(B) It shows the location of nucleus-nucleus contact areas, \n(C) It indicates the location of cell membranes, \n(D) It shows the location of NPs crossing a cell wall. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.421875, "loss": 4.277601718902588 }, { "id": "identity_338144", "image": "TabMWP/images/27870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At this price, is there a shortage or a surplus?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.34375, "loss": 4.007361888885498 }, { "id": "identity_222851", "image": "A-OKVQA/images/400045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beverage cans are visible on the desk?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.125, "loss": 4.559178352355957 }, { "id": "identity_37057", "image": "A-OKVQA/images/573349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is being filmed here?\nChoices:\n(A) protest\n(B) cake walk\n(C) horse show\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.421875, "loss": 4.077432632446289 }, { "id": "identity_300981", "image": "FigureQA/images/1299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.328125, "loss": 4.033753395080566 }, { "id": "identity_325530", "image": "PMC-VQA/images/PMC9091813_F4_276614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the black arrowheads indicating in images A, F, B, and G?\nChoices:\n(A) fetal cells\n(B) placental cells\n(C) maternal cells\n(D) umbilical cord cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.1875, "loss": 4.5855231285095215 }, { "id": "identity_6539", "image": "TabMWP/images/13081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.484152317047119 }, { "id": "identity_284149", "image": "A-OKVQA/images/428477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is suspended in the air?\nChoices:\n(A) airplane\n(B) spaceship\n(C) kite\n(D) weather balloon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.625, "loss": 4.06333589553833 }, { "id": "identity_8825", "image": "FigureQA/images/22732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.34375, "loss": 4.173336982727051 }, { "id": "identity_306225", "image": "PMC-VQA/images/PMC6425224_F4_453946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in relation to the sacrum?\nChoices:\n(A) Right \n(B) Left \n(C) Anterior \n(D) Posterior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 4.675353050231934 }, { "id": "identity_287558", "image": "TextVQA/images/c97ac1efbb3c6273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What action is the advertisement encouraging you to take regarding the event happening tomorrow?" }, { "from": "gpt", "value": "The answer is watch" } ], "clip_score": 14.796875, "loss": 4.83866024017334 }, { "id": "identity_185020", "image": "A-OKVQA/images/556722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the front display of the train?" }, { "from": "gpt", "value": "The answer is UNICEM" } ], "clip_score": 25.171875, "loss": 4.438263416290283 }, { "id": "identity_214739", "image": "VQA2.0/images/12145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.09375, "loss": 4.132538795471191 }, { "id": "identity_326920", "image": "A-OKVQA/images/390213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green sign indicate?\nChoices:\n(A) plug in\n(B) give all\n(C) move mentally\n(D) say prayer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.9453125, "loss": 4.419659614562988 }, { "id": "identity_275002", "image": "FigureQA/images/86125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.171875, "loss": 4.768979072570801 }, { "id": "identity_14439", "image": "Super-CLEVR/images/superCLEVR_new_000350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a dirtbike that is to the left of the large purple thing; is its color the same as the large motorbike in front of the large yellow metal road bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.9375, "loss": 5.204455852508545 }, { "id": "identity_248926", "image": "DocVQA/images/lffw0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for the agenda items related to nutrition and adolescence, and what is the status of these items?" }, { "from": "gpt", "value": "The answer is Commentaries; On Hold" } ], "clip_score": 21.875, "loss": 5.231626510620117 }, { "id": "identity_322185", "image": "TQA/images/question_images/erosion_6852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is deposited on this slope?\nChoices:\n(A) soil\n(B) fine material\n(C) sand and shingle\n(D) rocks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.4375, "loss": 4.113367080688477 }, { "id": "identity_9506", "image": "VQA-AS/images/6034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is she on the table?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.125, "loss": 4.086884021759033 }, { "id": "identity_32415", "image": "PMC-VQA/images/PMC6627094_ijms-20-03047-f007_7646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was imaged in the study? \nChoices:\n(A) HER2 expression \n(B) pancreatic cancer cells \n(C) bone marrow \n(D) lungs " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.65625, "loss": 4.130318641662598 }, { "id": "identity_291358", "image": "ScienceQA/images/4318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Colton's experiment to answer?\nChoices:\n(A) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?\n(B) Can pennies hold more drops of pure water or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 4.395023345947266 }, { "id": "identity_6093", "image": "PMC-VQA/images/PMC8539564_jpm-11-01041-f001_93437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis based on the imaging findings? \nChoices:\n(A) Metastatic cancer \n(B) Hemangioma \n(C) Hepatocellular carcinoma \n(D) Fibrolamellar carcinoma " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.265625, "loss": 4.543137073516846 }, { "id": "identity_254446", "image": "DVQA/images/bar_train_00135860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across all stores, and which item has the highest total sales?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 35.5625, "loss": 5.528119087219238 }, { "id": "identity_329419", "image": "PMC-VQA/images/PMC6298256_f1_412562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organs show increased brightness after the contrast agent is injected?\nChoices:\n(A) Kidneys and bladder \n(B) Stomach and intestines \n(C) Spleen and liver \n(D) Brain and spinal cord" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0, "loss": 4.6410675048828125 }, { "id": "identity_143223", "image": "DocVQA/images/jggn0226_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the surgeon's name?" }, { "from": "gpt", "value": "The answer is Tigges" } ], "clip_score": 17.09375, "loss": 4.746860027313232 }, { "id": "identity_109876", "image": "VQA2.0/images/353056.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.234375, "loss": 4.47670316696167 }, { "id": "identity_271718", "image": "IconQA/images/78608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.265625, "loss": 4.069892883300781 }, { "id": "identity_18620", "image": "DVQA/images/bar_train_00070503.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the least number of people summed across all the categories?" }, { "from": "gpt", "value": "The answer is flow" } ], "clip_score": 28.578125, "loss": 4.139481544494629 }, { "id": "identity_26521", "image": "A-OKVQA/images/486360.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the string the men hold?\nChoices:\n(A) bird\n(B) kite\n(C) top\n(D) dog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.5508713722229 }, { "id": "identity_218593", "image": "TextVQA/images/2d648ed9328308fd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are wearing knee pads?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.609375, "loss": 4.673833847045898 }, { "id": "identity_297487", "image": "TextVQA/images/d13b7ec72bd5eed5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of this storefront?" }, { "from": "gpt", "value": "The answer is rent" } ], "clip_score": 28.46875, "loss": 4.046193599700928 }, { "id": "identity_190187", "image": "PMC-VQA/images/PMC5303254_Fig3_126212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is being imaged in this picture?" }, { "from": "gpt", "value": "The answer is Thigh" } ], "clip_score": 22.15625, "loss": 4.035863399505615 }, { "id": "identity_304669", "image": "A-OKVQA/images/331386.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these people located?\nChoices:\n(A) united kingdom\n(B) italy\n(C) germany\n(D) france" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4765625, "loss": 5.173439979553223 }, { "id": "identity_268129", "image": "AI2D/images/3186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the nucleus is considered the central component?\nChoices:\n(A) plasma membrane\n(B) cytoplasm\n(C) none of the above\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.03125, "loss": 4.345924377441406 }, { "id": "identity_297249", "image": "PMC-VQA/images/PMC6014307_F0005_332052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which plane is utilized to visualize the collection in the peripelvic region?\nOptions:\n(A) Sagittal plane\n(B) Transverse plane\n(C) Coronal plane\n(D) Axial plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.921875, "loss": 4.592459201812744 }, { "id": "identity_297566", "image": "A-OKVQA/images/193640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is the hairstyle of the girl in green often compared to? Is it a pig, horse, cow, or wolf?\nChoices:\n(A) pig\n(B) horse\n(C) cow\n(D) wolf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.8671875, "loss": 4.86760950088501 }, { "id": "identity_120666", "image": "Super-CLEVR/images/superCLEVR_new_001288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.203125, "loss": 4.367295265197754 }, { "id": "identity_324982", "image": "A-OKVQA/images/41351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they doing on the laptop?\nChoices:\n(A) gaming\n(B) chatting\n(C) writing\n(D) school work" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5, "loss": 4.2205657958984375 }, { "id": "identity_21831", "image": "TextVQA/images/b4673735de3cb9b9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: name one sponsor from the photo?" }, { "from": "gpt", "value": "The answer is flier" } ], "clip_score": 21.8125, "loss": 4.257821559906006 }, { "id": "identity_55530", "image": "PlotQA/images/144099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year had the lowest overall net taxes on products for all countries combined?" }, { "from": "gpt", "value": "The answer is The year with the shortest combined bar height" } ], "clip_score": 29.703125, "loss": 4.068750381469727 }, { "id": "identity_315439", "image": "ScienceQA/images/9878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Frankfort\n(B) Carson City\n(C) Helena\n(D) Denver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.546875, "loss": 4.385935306549072 }, { "id": "identity_313511", "image": "AI2D/images/3653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area in the diagram shows the iliac artery?\nChoices:\n(A) w\n(B) j\n(C) i\n(D) b" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.578125, "loss": 4.111123561859131 }, { "id": "identity_2369", "image": "PMC-VQA/images/PMC9042937_Fig3_265881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is marked in green in the image? \nChoices:\n(A) Nuclei \n(B) Vascular endothelium \n(C) Synapses \n(D) Axons " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.448019504547119 }, { "id": "identity_332351", "image": "PMC-VQA/images/PMC4172974_Fig3_322220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the horizontal axis represent?\nChoices:\n(A) Dorsal view and external cross-sectional view.\n(B) Right lateral internal view and cross-sectional view.\n(C) Anterior view and oblique view.\n(D) Ventral view and transverse view." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 4.55513858795166 }, { "id": "identity_318800", "image": "FigureQA/images/49115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5, "loss": 4.190133094787598 }, { "id": "identity_23791", "image": "A-OKVQA/images/379987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the eggs placed in the container?\nChoices:\n(A) protection\n(B) to cook\n(C) to eat\n(D) to dye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.5078125, "loss": 4.485434532165527 }, { "id": "identity_207017", "image": "A-OKVQA/images/244088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking in pairs?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 17.4375, "loss": 4.591014385223389 }, { "id": "identity_335851", "image": "PMC-VQA/images/PMC6938471_Fig1_92791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the bottom row?\nChoices:\n(A) Initial T1 SE pre contrast.\n(B) Initial T1 SE post contrast (a).\n(C) T2 weighted image.\n(D) FLAIR image." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6328125, "loss": 4.236207485198975 }, { "id": "identity_322242", "image": "A-OKVQA/images/300753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the player just do?\nChoices:\n(A) ran home\n(B) hit ball\n(C) struck out\n(D) missed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.765625, "loss": 4.196556091308594 }, { "id": "identity_157412", "image": "TQA/images/question_images/earth_moon_phases_755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many positions of the Moon around the Earth are depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 35.03125, "loss": 4.537733554840088 }, { "id": "identity_275185", "image": "TQA/images/abc_question_images/cycle_water_11490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram represents precipitation?\nChoices:\n(A) F\n(B) H\n(C) R\n(D) N" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 4.181270122528076 }, { "id": "identity_37750", "image": "AI2D/images/2941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which layer is between j and f?\nChoices:\n(A) b\n(B) c\n(C) g\n(D) a" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.6171875, "loss": 4.498995780944824 }, { "id": "identity_227893", "image": "Super-CLEVR/images/superCLEVR_new_000572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the combined sizes of the mountain bike and the cruiser equal to the size of the motorcycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.09375, "loss": 5.23787784576416 }, { "id": "identity_2836", "image": "DocVQA/images/xzyh0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the kind of water?" }, { "from": "gpt", "value": "The answer is battery supply" } ], "clip_score": 16.859375, "loss": 6.299989700317383 }, { "id": "identity_354", "image": "A-OKVQA/images/489961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the sign for?\nChoices:\n(A) animals\n(B) drivers\n(C) pedestrians\n(D) bicyclists" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.431431293487549 }, { "id": "identity_166649", "image": "TabMWP/images/36428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the quantity demanded equal the quantity supplied?" }, { "from": "gpt", "value": "The answer is No price listed" } ], "clip_score": 31.3125, "loss": 4.279898643493652 }, { "id": "identity_104731", "image": "VQA-AS/images/12955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is depicted in the image above the fireplace?" }, { "from": "gpt", "value": "The answer is A painting" } ], "clip_score": 27.28125, "loss": 4.64171028137207 }, { "id": "identity_20777", "image": "PMC-VQA/images/PMC4937711_F19_49533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI finding in the region of the right cavernous sinus? \nChoices:\n(A) Hyperintense soft tissue deposit \n(B) Hypointense soft tissue deposit \n(C) Isointense soft tissue deposit \n(D) No soft tissue deposit seen " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0625, "loss": 4.363544940948486 }, { "id": "identity_285697", "image": "PMC-VQA/images/PMC7684875_Fig2_303874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following conditions is found in both lower lobes of the lungs? \n(A) pneumonia \n(B) pleural effusion \n(C) lung fibrosis \n(D) tuberculosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.070988178253174 }, { "id": "identity_204310", "image": "Super-CLEVR/images/superCLEVR_new_001699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have the same color as the large car?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.703125, "loss": 4.689347267150879 }, { "id": "identity_27143", "image": "A-OKVQA/images/346716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color car roof here would be most dangerous to fall from?\nChoices:\n(A) black\n(B) white\n(C) all same\n(D) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9765625, "loss": 4.3259453773498535 }, { "id": "identity_322790", "image": "A-OKVQA/images/177289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area with this appliance called?\nChoices:\n(A) dorm\n(B) kitchenette\n(C) bathroom\n(D) kitchen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.234375, "loss": 4.447903633117676 }, { "id": "identity_297456", "image": "PMC-VQA/images/PMC7359123_acn351106-fig-0002_213637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which syndrome is depicted in the image provided?\nChoices:\n(A) Turner Syndrome\n(B) Sturge-Weber Syndrome\n(C) Klinefelter Syndrome\n(D) Parkinson's Syndrome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.236663341522217 }, { "id": "identity_288183", "image": "A-OKVQA/images/136323.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What purpose do the tarps on these vessels serve for the interior of the boats?\nChoices:\n(A) hold water\n(B) nothing\n(C) keep dry\n(D) signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0, "loss": 4.731743812561035 }, { "id": "identity_294993", "image": "A-OKVQA/images/166975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who among the following is most likely the driver of the car based on the given choices? \n(A) facing camera\n(B) in store\n(C) facing bus\n(D) in bus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.875, "loss": 4.2614617347717285 }, { "id": "identity_309681", "image": "IconQA/images/59834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, which color are you more likely to get?\nChoices:\n(A) pink\n(B) black\n(C) neither; black and pink are equally likely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.59375, "loss": 4.734192848205566 }, { "id": "identity_26446", "image": "PMC-VQA/images/PMC7243659_FIG5_176278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow point to in the image? \nChoices:\n(A) Right atrium \n(B) Right ventricle \n(C) Left atrium \n(D) Left ventricle " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.421875, "loss": 4.694063663482666 }, { "id": "identity_12629", "image": "ScienceQA/images/13075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) North Carolina\n(B) Virginia\n(C) Connecticut\n(D) Georgia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 4.274813175201416 }, { "id": "identity_306859", "image": "FigureQA/images/62359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2890625, "loss": 4.653873443603516 }, { "id": "identity_35100", "image": "PMC-VQA/images/PMC5209318_Fig3_109471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the dotted line in the image? \nChoices:\n(A) To demarcate the right margin of the trachea. \n(B) To show the location of the esophagus. \n(C) To demarcate the left margin of the trachea. \n(D) To show the location of the heart. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.53125, "loss": 4.522330284118652 }, { "id": "identity_16641", "image": "FigureQA/images/66484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.96875, "loss": 4.168703556060791 }, { "id": "identity_217728", "image": "A-OKVQA/images/449029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street signs are visible?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.21875, "loss": 4.421353340148926 }, { "id": "identity_329047", "image": "PMC-VQA/images/PMC4839123_Fig1_23080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used for the early detection of bone metastases?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.578125, "loss": 4.564506530761719 }, { "id": "identity_297029", "image": "A-OKVQA/images/250192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the elephant in the image engaged in?\nChoices:\n(A) drinking\n(B) painting\n(C) playing soccer\n(D) eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.484375, "loss": 4.292139053344727 }, { "id": "identity_326542", "image": "PMC-VQA/images/PMC8359591_Fig2_33276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the CT scan?\nChoices:\n(A) Smooth bone surface\n(B) Irregular erosions\n(C) Narrowing of joint space\n(D) Soft tissue swelling." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.078125, "loss": 4.1564106941223145 }, { "id": "identity_291294", "image": "A-OKVQA/images/172924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which continent is the train located?\nChoices:\n(A) North America\n(B) Europe\n(C) Africa\n(D) Asia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.953125, "loss": 4.078198432922363 }, { "id": "identity_108217", "image": "VQA2.0/images/22278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct objects can be seen on top of the cross?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.125, "loss": 4.394964694976807 }, { "id": "identity_260420", "image": "AI2D/images/2884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the layer that is directly beneath the enamel and surrounds the pulp?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5625, "loss": 4.710928916931152 }, { "id": "identity_287526", "image": "ScienceQA/images/20372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.890625, "loss": 4.126900672912598 }, { "id": "identity_283720", "image": "PMC-VQA/images/PMC6617077_jcm-08-00771-f001_6055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the surface condition of the PEEK samples displayed in images a-c? Is it:\n(A) Treated\n(B) Untreated\n(C) Painted\n(D) Coated" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.171875, "loss": 4.3589091300964355 }, { "id": "identity_294956", "image": "AI2D/images/809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How can bacteria and dead organic matter be classified?\nChoices:\n(A) secondary consumers\n(B) tertiary consumers\n(C) secondary producers\n(D) primary food source" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.59375, "loss": 4.376469135284424 }, { "id": "identity_284069", "image": "PMC-VQA/images/PMC3703402_fig5_216211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the patient's brain is the lesion located?\nChoices:\n(A) Left frontal lobe\n(B) Right frontal lobe\n(C) Occipital lobe\n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.650927543640137 }, { "id": "identity_316895", "image": "Super-CLEVR/images/superCLEVR_new_001598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer rubber things to the right of the blue metallic bicycle than large metallic bicycles?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.125, "loss": 5.222824573516846 }, { "id": "identity_5672", "image": "PMC-VQA/images/PMC4071974_fig4_301445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was able to identify a field inhomogeneity artifact in the frontal region? \nChoices:\n(A) SWI \n(B) BS-MRV \n(C) CT \n(D) MRI\t" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.703125, "loss": 4.569863796234131 }, { "id": "identity_264944", "image": "ScienceQA/images/20149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms contains matter that was previously part of the bilberry?\nChoices:\n(A) bear sedge\n(B) grizzly bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.329154968261719 }, { "id": "identity_6689", "image": "Super-CLEVR/images/superCLEVR_new_001134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the green bicycle and the purple matte bicycle have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.796875, "loss": 5.361339092254639 }, { "id": "identity_11528", "image": "DocVQA/images/khwg0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text wtitten on the top right corner?" }, { "from": "gpt", "value": "The answer is EXAMPLE:Chicago Region" } ], "clip_score": 19.6875, "loss": 4.5516180992126465 }, { "id": "identity_295662", "image": "A-OKVQA/images/557721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended use of the cones?\nChoices:\n(A) to deter children\n(B) for selling\n(C) as obstacles\n(D) for decoration" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.46875, "loss": 4.73838472366333 }, { "id": "identity_257104", "image": "VQA-AS/images/13376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are in the room, and what is the cat doing with the yarn?" }, { "from": "gpt", "value": "The answer is Seven objects, playing" } ], "clip_score": 27.59375, "loss": 5.351894378662109 }, { "id": "identity_212450", "image": "VQA2.0/images/542027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object directly in front of the person?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 15.1796875, "loss": 4.182619571685791 }, { "id": "identity_35541", "image": "A-OKVQA/images/245361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a term related to this event?\nChoices:\n(A) goal\n(B) homerun\n(C) surfs up\n(D) balance beam" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.064258575439453 }, { "id": "identity_275039", "image": "VQA-AS/images/6329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the curtain currently in the open position?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.796875, "loss": 4.464918613433838 }, { "id": "identity_270220", "image": "IconQA/images/11545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence after the first picture of the moon?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 4.544281482696533 }, { "id": "identity_243989", "image": "VQA2.0/images/367935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the company written on the side of the plane, and what three letters are on the tail of the plane?" }, { "from": "gpt", "value": "The answer is Worldwide Services, ups" } ], "clip_score": 28.5625, "loss": 4.817626476287842 }, { "id": "identity_291057", "image": "A-OKVQA/images/4834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of having all the logs? \n(A) for sale\n(B) to start a fire\n(C) to build a house\n(D) to balance a boat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.34375, "loss": 4.518446922302246 }, { "id": "identity_295774", "image": "IconQA/images/60708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the dotted line serve as a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.65625, "loss": 4.128605842590332 }, { "id": "identity_35972", "image": "ScienceQA/images/7966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Phoenix\n(B) Olympia\n(C) Tucson\n(D) Denver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.53125, "loss": 4.2715020179748535 }, { "id": "identity_265046", "image": "PMC-VQA/images/PMC5688175_Fig7_240165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red dashed line represent in image e)? \nChoices:\n(A) The proximal side of the airway \n(B) The distal side of the airway \n(C) The middle of the airway \n(D) The outside of the airway." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.921875, "loss": 4.253928184509277 }, { "id": "identity_294720", "image": "A-OKVQA/images/451972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of holiday parade is being celebrated in this event?\nChoices:\n(A) christmas\n(B) st patricks\n(C) flag day\n(D) veterans day" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.234375, "loss": 4.078017234802246 }, { "id": "identity_324752", "image": "ScienceQA/images/17876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) rough\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.71875, "loss": 4.295446395874023 }, { "id": "identity_288378", "image": "TextVQA/images/554140b113c2f1c9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which book features a tree on its cover?" }, { "from": "gpt", "value": "The answer is participe" } ], "clip_score": 19.4375, "loss": 5.2466912269592285 }, { "id": "identity_277842", "image": "FigureQA/images/80524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.875, "loss": 4.006941318511963 }, { "id": "identity_196282", "image": "TextVQA/images/e13f311f4fdae378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the field's boundary fence?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.171875, "loss": 4.102671146392822 }, { "id": "identity_189633", "image": "VQA2.0/images/173634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.0625, "loss": 4.295609951019287 }, { "id": "identity_282179", "image": "ScienceQA/images/3312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following colonies is depicted in the image? \n(A) Maine\n(B) Rhode Island\n(C) Connecticut\n(D) Florida" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 4.9283366203308105 }, { "id": "identity_1982", "image": "TQA/images/question_images/cycle_carbon_5026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What releases nitrogen back to the atmosphere?\nChoices:\n(A) Dinitrifying bacteria\n(B) Nitrogen-fixing bacteria\n(C) Fertilizers\n(D) Plant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.71875, "loss": 4.616534233093262 }, { "id": "identity_184075", "image": "A-OKVQA/images/102506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located behind the traffic light?" }, { "from": "gpt", "value": "The answer is A food establishment (Caras)" } ], "clip_score": 28.125, "loss": 4.819305419921875 }, { "id": "identity_106381", "image": "Super-CLEVR/images/superCLEVR_new_001729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more bicycles or motor-powered vehicles in the image?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 22.421875, "loss": 4.119342803955078 }, { "id": "identity_18295", "image": "A-OKVQA/images/70636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is above the horses?\nChoices:\n(A) fly\n(B) kite\n(C) sky\n(D) dragon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.609375, "loss": 4.272212982177734 }, { "id": "identity_263237", "image": "ScienceQA/images/20123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Tennessee?\nChoices:\n(A) Columbia\n(B) Bismarck\n(C) Jackson\n(D) Nashville" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.015625, "loss": 4.018377780914307 }, { "id": "identity_286446", "image": "PMC-VQA/images/PMC3540650_fig1_177765.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this image, which muscle is being impacted?\nChoices:\n(A) Superior rectus muscle\n(B) Lateral rectus muscle\n(C) Inferior oblique muscle\n(D) Medial rectus muscle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.03125, "loss": 4.22360372543335 }, { "id": "identity_287021", "image": "A-OKVQA/images/234717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following locations is lightning most likely to strike in this area? \n(A) water\n(B) vehicle\n(C) rocks\n(D) lightning rod" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.7265625, "loss": 4.210022926330566 }, { "id": "identity_334994", "image": "TextVQA/images/02a657b3b00696a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is this character described?" }, { "from": "gpt", "value": "The answer is bad company" } ], "clip_score": 12.8515625, "loss": 4.989513397216797 }, { "id": "identity_216013", "image": "A-OKVQA/images/284160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used in the advertisement on the side of the bus?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.234375, "loss": 4.024764060974121 }, { "id": "identity_20085", "image": "PMC-VQA/images/PMC7234030_FIG1_173195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding shown in the image? \nChoices:\n(A) Cavitation \n(B) Consolidation \n(C) Nodular opacities \n(D) Pleural effusion " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.28125, "loss": 4.433727741241455 }, { "id": "identity_307363", "image": "FigureQA/images/85851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum's value the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.453125, "loss": 4.140642166137695 }, { "id": "identity_34509", "image": "PMC-VQA/images/PMC6245846_Fig1_399035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dotted blue line represent in the image? \nChoices:\n(A) The patient's height \n(B) The location of the ASIS \n(C) The axis of the patient's body \n(D) The trial liner position " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.046875, "loss": 4.351953506469727 }, { "id": "identity_286252", "image": "A-OKVQA/images/334511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which destination are the individuals currently going?\nChoices:\n(A) casinos\n(B) bathrooms\n(C) villas\n(D) market" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.6484375, "loss": 4.2294745445251465 }, { "id": "identity_289523", "image": "ScienceQA/images/12913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.875, "loss": 4.070231914520264 }, { "id": "identity_268690", "image": "Super-CLEVR/images/superCLEVR_new_001160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the big red objects on the left side of the gray matte car equal the number of gray objects on the left side of the small red airplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.609375, "loss": 4.875608921051025 }, { "id": "identity_294021", "image": "VizWiz/images/VizWiz_train_00015915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which painting is this?" }, { "from": "gpt", "value": "The answer is pom tree" } ], "clip_score": 26.234375, "loss": 4.712618350982666 }, { "id": "identity_292232", "image": "A-OKVQA/images/513282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country does this river flow through?\nChoices:\n(A) Italy\n(B) Britain\n(C) France\n(D) Spain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.53125, "loss": 4.032135009765625 }, { "id": "identity_209393", "image": "A-OKVQA/images/166975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the person standing?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 26.8125, "loss": 4.013609409332275 }, { "id": "identity_271405", "image": "AI2D/images/4318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject matter of the diagram shown?\nChoices:\n(A) Moon Formation\n(B) Earth Formation\n(C) Solar System\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.921875, "loss": 4.08034610748291 }, { "id": "identity_34219", "image": "PMC-VQA/images/PMC7911842_diagnostics-11-00169-f001_375905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which modality is shown in the leftmost plot? \nChoices:\n(A) contrast-enhanced T1 \n(B) T2 \n(C) T1 \n(D) Flair " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.046875, "loss": 4.319212436676025 }, { "id": "identity_310731", "image": "IconQA/images/59373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a turtle. What is the third image?\nChoices:\n(A) sub\n(B) turtle\n(C) fish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.765625, "loss": 4.7428812980651855 }, { "id": "identity_212348", "image": "VQA2.0/images/385803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many obstacles with purple figures can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.8671875, "loss": 4.44666051864624 }, { "id": "identity_53658", "image": "DocVQA/images/jhly0000_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Account Manager?" }, { "from": "gpt", "value": "The answer is G. N. Kuruc, Jr" } ], "clip_score": 22.53125, "loss": 5.1109299659729 }, { "id": "identity_121658", "image": "Super-CLEVR/images/superCLEVR_new_000239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned closest to the bottom edge of the image?" }, { "from": "gpt", "value": "The answer is Sedan" } ], "clip_score": 26.015625, "loss": 4.5836968421936035 }, { "id": "identity_1421", "image": "A-OKVQA/images/516414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What yellow items sits on the boat?\nChoices:\n(A) stockings\n(B) bananas\n(C) net\n(D) mustard" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.109375, "loss": 4.145954608917236 }, { "id": "identity_80541", "image": "GeoQA+/images/5527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a∥b,直角三角板ABC的直角顶点C在直线b上,若∠1=54°,求∠CAB的度数为()" }, { "from": "gpt", "value": "The answer is Error in given information or image" } ], "clip_score": 27.90625, "loss": 4.54707670211792 }, { "id": "identity_31990", "image": "A-OKVQA/images/51702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what sort of setting do these people find themselves?\nChoices:\n(A) mall\n(B) desert\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.1328125, "loss": 4.309980869293213 }, { "id": "identity_16501", "image": "TabMWP/images/4927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 4.437016487121582 }, { "id": "identity_32281", "image": "PMC-VQA/images/PMC8105406_Fig1_435784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the hip radiograph (C) show at 5 years of age? \nChoices:\n(A) Normal hip development \n(B) Hip dysplasia with a flat and irregular acetabular roof \n(C) Fractured hip \n(D) Dislocated hip joint. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.78125, "loss": 4.2022199630737305 }, { "id": "identity_260018", "image": "Super-CLEVR/images/superCLEVR_new_000069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is in front of the green rubber object and behind the matte bicycle?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 32.125, "loss": 4.27667236328125 }, { "id": "identity_401", "image": "A-OKVQA/images/576652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's the name of the spot the player is standing on?\nChoices:\n(A) home grass\n(B) out field\n(C) ball field\n(D) pitcher's mound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.0, "loss": 4.326001167297363 }, { "id": "identity_283725", "image": "VQA2.0/images/167559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what is the knife pointed?" }, { "from": "gpt", "value": "The answer is toward medicine cabinet" } ], "clip_score": 25.328125, "loss": 4.320988178253174 }, { "id": "identity_217482", "image": "A-OKVQA/images/29525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transportation of the person on the far right of the image?" }, { "from": "gpt", "value": "The answer is Car" } ], "clip_score": 18.359375, "loss": 4.437539577484131 }, { "id": "identity_266932", "image": "AI2D/images/1792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What factor from the food web diagram would lead to an increase in the frog population?\nChoices:\n(A) Increase in plants\n(B) Increase in insects\n(C) Decrease in plants\n(D) Decrease in insects" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.359375, "loss": 4.011224269866943 }, { "id": "identity_281421", "image": "A-OKVQA/images/542681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company is a sponsor of this event among the following choices: Amazon, Cinemax, HBO, or Garnier?\nChoices:\n(A) amazon\n(B) cinemax\n(C) hbo\n(D) garnier" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.0, "loss": 4.0007805824279785 }, { "id": "identity_281578", "image": "PMC-VQA/images/PMC5333163_f3_133608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the transparent north-south longitudinal projection display?\nChoices:\n(A) Stratigraphy \n(B) Fossils \n(C) Magma \n(D) Minerals" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.890625, "loss": 4.977210521697998 }, { "id": "identity_330510", "image": "ScienceQA/images/8962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Des Moines\n(B) Cedar Rapids\n(C) Davenport\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.109375, "loss": 4.342202663421631 }, { "id": "identity_15778", "image": "AI2D/images/3314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of outermost part of a cell?\nChoices:\n(A) centriole\n(B) cell membrane\n(C) ribosome\n(D) mitochondria" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.75, "loss": 4.319937705993652 }, { "id": "identity_295308", "image": "A-OKVQA/images/281725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which winter activity are the people engaging?\nChoices:\n(A) ice skating\n(B) luge\n(C) snowboarding\n(D) ice hockey" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.625, "loss": 4.009017467498779 }, { "id": "identity_285057", "image": "ScienceQA/images/16407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\nChoices:\n(A) Augusta\n(B) Harrisburg\n(C) Annapolis\n(D) Newport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.28125, "loss": 4.110729217529297 }, { "id": "identity_157350", "image": "TQA/images/question_images/tides_145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary difference between image (a) and image (b)?" }, { "from": "gpt", "value": "The answer is Direction of ocean bulging" } ], "clip_score": 32.15625, "loss": 5.002450942993164 }, { "id": "identity_299050", "image": "Super-CLEVR/images/superCLEVR_new_000883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of minivans in front of the matte car as there are fighters?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.65625, "loss": 5.009727478027344 }, { "id": "identity_294309", "image": "A-OKVQA/images/150410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located behind the man among the following options: a cardboard box, a wrestling ring, a basketball, or a replica airplane?\nChoices:\n(A) cardboard box\n(B) wrestling ring\n(C) basketball\n(D) replica airplane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.921875, "loss": 4.000283241271973 }, { "id": "identity_327278", "image": "DocVQA/images/nykh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is this letter addressed to?" }, { "from": "gpt", "value": "The answer is Miss Bev Rosen" } ], "clip_score": 29.40625, "loss": 4.924447536468506 }, { "id": "identity_201330", "image": "Super-CLEVR/images/superCLEVR_new_001906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the smallest vehicle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 19.859375, "loss": 4.043776512145996 }, { "id": "identity_15103", "image": "A-OKVQA/images/575222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the front of the train?\nChoices:\n(A) bear\n(B) clown\n(C) elephant\n(D) zebra" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.025036811828613 }, { "id": "identity_24303", "image": "PMC-VQA/images/PMC8687640_f01_142757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the possible diagnosis based on the CT scan? \nChoices:\n(A) Fracture \n(B) Infection \n(C) Tumor \n(D) Arthritis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 5.15906286239624 }, { "id": "identity_332181", "image": "PMC-VQA/images/PMC4956624_Fig26_53972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What diagnostic tool was likely used to make this observation?\nChoices:\n(A) X-ray.\n(B) MRI.\n(C) CT scan.\n(D) Echocardiogram." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.1875, "loss": 4.770447731018066 }, { "id": "identity_222657", "image": "A-OKVQA/images/197918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dolls can be seen in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.3125, "loss": 4.6405558586120605 }, { "id": "identity_233700", "image": "AI2D/images/488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the trophic level of the hawk and explain its role in the food chain, considering its interactions with at least two other organisms in the diagram" }, { "from": "gpt", "value": "The answer is Secondary consumer, controls primary consumers, supports producers" } ], "clip_score": 29.109375, "loss": 4.163763523101807 }, { "id": "identity_41484", "image": "DocVQA/images/lfdh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of Dr. George E. Bunce in this report?" }, { "from": "gpt", "value": "The answer is Consultant to OIR/NIH" } ], "clip_score": 23.796875, "loss": 4.161892414093018 }, { "id": "identity_195495", "image": "VQA2.0/images/51555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible light sources are there in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.03125, "loss": 4.332977294921875 }, { "id": "identity_222243", "image": "VQA2.0/images/467753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the woman's shorts?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 14.84375, "loss": 4.61997652053833 }, { "id": "identity_187109", "image": "PMC-VQA/images/PMC9161094_f1_299799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the intensity of the image?" }, { "from": "gpt", "value": "The answer is High intensity" } ], "clip_score": 17.65625, "loss": 4.050083637237549 }, { "id": "identity_284602", "image": "A-OKVQA/images/206394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the circular silver object on the boat typically used?\nChoices:\n(A) diving\n(B) steering\n(C) sitting\n(D) fishing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.71875, "loss": 4.711364269256592 }, { "id": "identity_294920", "image": "PMC-VQA/images/PMC6414466_F1_449407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the left column of the image display?\nChoices:\n(A) L/CD207+ specific staining \n(B) T-cell specific staining \n(C) B-cell specific staining \n(D) Macrophage specific staining" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.8125, "loss": 4.157806396484375 }, { "id": "identity_15447", "image": "Super-CLEVR/images/superCLEVR_new_000377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small cyan tandem bikes on the right side of the tiny regular bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.421875, "loss": 5.45701265335083 }, { "id": "identity_16466", "image": "VQA2.0/images/81128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man carrying a purse?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.21875, "loss": 4.407174110412598 }, { "id": "identity_23259", "image": "IconQA/images/76226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.515625, "loss": 4.253844738006592 }, { "id": "identity_203509", "image": "TextVQA/images/29300b671ed492ed.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the chair in the background?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 12.2109375, "loss": 5.432559490203857 }, { "id": "identity_294176", "image": "FigureQA/images/38352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Blue or Pale Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.171875, "loss": 4.517556667327881 }, { "id": "identity_263647", "image": "AI2D/images/2351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the letter C among the given choices? Is it:\n(A) Seed\n(B) None of the above\n(C) Fruit\n(D) Flower" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.46875, "loss": 4.6607346534729 }, { "id": "identity_293008", "image": "A-OKVQA/images/436808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country can this park be found?\nChoices:\n(A) United States\n(B) Venezuela\n(C) Mexico\n(D) Canada" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.8125, "loss": 4.0030012130737305 }, { "id": "identity_310046", "image": "AI2D/images/1470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is label c in the diagram?\nChoices:\n(A) central vent\n(B) central vent\n(C) cinder cone crater\n(D) rock fragments" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.875, "loss": 4.118136405944824 }, { "id": "identity_328584", "image": "PMC-VQA/images/PMC9290596_papr13055-fig-0001_343188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the spine is responsible for transmitting pain signals in the axial back?\nChoices:\n(A) Lumbar region\n(B) Sacral region\n(C) Cervical region\n(D) Thoracic region" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.4375, "loss": 4.079233646392822 }, { "id": "identity_295851", "image": "IconQA/images/43467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.1875, "loss": 4.446202754974365 }, { "id": "identity_308861", "image": "DocVQA/images/ylvw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which Project has a target Publication date of 3Q2002?" }, { "from": "gpt", "value": "The answer is Totelle CD-ROM 1 and 2mg" } ], "clip_score": 22.125, "loss": 4.540663242340088 }, { "id": "identity_208655", "image": "PMC-VQA/images/PMC4472651_Fig1_398035.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of artifact is shown in this image?" }, { "from": "gpt", "value": "The answer is No artifact" } ], "clip_score": 22.234375, "loss": 4.345199108123779 }, { "id": "identity_35514", "image": "PMC-VQA/images/PMC6341153_Fig1_425609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the solid arrow indicate? \nChoices:\n(A) pituitary stalk \n(B) medulla oblongata \n(C) optic chiasm \n(D) pineal gland " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 5.011399269104004 }, { "id": "identity_307269", "image": "PMC-VQA/images/PMC7819965_f3_347108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the zoomed section A display?\nChoices:\n(A) Inclusion of C. muridarum inside the OE \n(B) Inclusion of C. muridarum around the OE \n(C) No immunolabelling for C. muridarum \n(D) Inclusion of C. muridarum inside the control mouse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0625, "loss": 4.509197235107422 }, { "id": "identity_325080", "image": "ScienceQA/images/9698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) sticky\n(B) rough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.09375, "loss": 4.2911057472229 }, { "id": "identity_286153", "image": "PMC-VQA/images/PMC7484669_F4_251931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On illness day 2, which lobes are impacted by COVID-19 pneumonia?\nChoices:\n(A) Both upper lobes.\n(B) Right lower lobe and left middle lobe.\n(C) Right middle lobe and left lower lobe.\n(D) Left upper lobe and right lower lobe." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.625, "loss": 4.0594682693481445 }, { "id": "identity_32229", "image": "IconQA/images/60014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is fifth?\nChoices:\n(A) train\n(B) bear\n(C) plane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.59375, "loss": 4.066300392150879 }, { "id": "identity_33130", "image": "PMC-VQA/images/PMC7281545_cancers-12-01085-f002_187445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is stained in green? \nChoices:\n(A) macrophages \n(B) DAPI \n(C) RPS27 \n(D) CD68 " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.875, "loss": 5.320865631103516 }, { "id": "identity_311341", "image": "Super-CLEVR/images/superCLEVR_new_000895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the shiny bicycle on the right side of the large gray thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 25.0, "loss": 4.234086513519287 }, { "id": "identity_294605", "image": "A-OKVQA/images/387328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction are the three individuals on the right moving?\nChoices:\n(A) towards\n(B) right\n(C) away\n(D) left" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.7421875, "loss": 4.203313827514648 }, { "id": "identity_311266", "image": "DocVQA/images/mkvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what stage is the 'Totelle Combined Monograph' project?" }, { "from": "gpt", "value": "The answer is Under development by DesignWrite based on parthenon draft" } ], "clip_score": 15.8515625, "loss": 5.085654258728027 }, { "id": "identity_304453", "image": "Super-CLEVR/images/superCLEVR_new_001802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bikes like this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.5, "loss": 4.905479907989502 }, { "id": "identity_284201", "image": "PMC-VQA/images/PMC4377159_Fig1_372559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was discovered in the patient's left neck area?\nChoices:\n(A) Adenocarcinoma\n(B) Hodgkin Lymphoma\n(C) Tonsillitis\n(D) Leukemia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.959186553955078 }, { "id": "identity_332905", "image": "A-OKVQA/images/467854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing?\nChoices:\n(A) smoking cigarette\n(B) plowing field\n(C) stealing horses\n(D) feeding horses" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 4.521333694458008 }, { "id": "identity_337005", "image": "A-OKVQA/images/205243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of boats is this marina designed for?\nChoices:\n(A) sail\n(B) motor boat\n(C) row boat\n(D) yacht" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.15625, "loss": 4.260499954223633 }, { "id": "identity_20494", "image": "ScienceQA/images/9616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Maryland\n(B) Pennsylvania\n(C) Florida\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.453125, "loss": 4.112678050994873 }, { "id": "identity_299607", "image": "A-OKVQA/images/523262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason have the people assembled together?\nChoices:\n(A) to pose\n(B) to eat\n(C) to wrestle\n(D) for warmth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9453125, "loss": 4.042381763458252 }, { "id": "identity_29562", "image": "ScienceQA/images/13734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) flexible\n(B) hard\n(C) slippery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.859375, "loss": 4.121415615081787 }, { "id": "identity_33193", "image": "AI2D/images/3359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the flower includes the anther and the filament?\nChoices:\n(A) stamen\n(B) petal\n(C) carpel\n(D) sepal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.5, "loss": 4.064611911773682 }, { "id": "identity_328913", "image": "PMC-VQA/images/PMC7641295_Fig2_290908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might the abnormal regions in the lower lobes indicate?\nChoices:\n(A) Emphysema\n(B) Pneumonia\n(C) Tuberculosis\n(D) Asthma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.875, "loss": 4.3430609703063965 }, { "id": "identity_190924", "image": "PMC-VQA/images/PMC5167692_F1_106251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which direction are the arrows in the MRI image pointing?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 33.8125, "loss": 4.4338836669921875 }, { "id": "identity_235056", "image": "TextVQA/images/3e569f2f4c818ed6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the third word in the second line on the left page?" }, { "from": "gpt", "value": "The answer is president" } ], "clip_score": 14.3359375, "loss": 4.116222381591797 }, { "id": "identity_330628", "image": "PMC-VQA/images/PMC4735056_fig4_467986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pulmonary infiltration located in the radiograph?\nChoices:\n(A) Right lung\n(B) Left lung\n(C) Bilateral\n(D) Unavailable" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.6875, "loss": 4.01773738861084 }, { "id": "identity_277073", "image": "PMC-VQA/images/PMC5084403_Fig10_88889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does red fluorescence indicate in terms of cellular structures?\nChoices:\n(A) Endoplasmic reticulum \n(B) Mitochondria \n(C) Nucleus \n(D) Golgi apparatus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 4.0153303146362305 }, { "id": "identity_265584", "image": "FigureQA/images/18372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Magenta or Web Maroon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.71875, "loss": 4.656370639801025 }, { "id": "identity_277640", "image": "ScienceQA/images/12495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? \nOptions:\n(A) Fiji\n(B) Kiribati\n(C) Tuvalu\n(D) Tonga" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.859375, "loss": 4.376535892486572 }, { "id": "identity_299354", "image": "AI2D/images/1495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage of the water cycle happens when the air is unable to hold the condensed water anymore due to excessive condensation?\nChoices:\n(A) evaporation\n(B) precipitation\n(C) condensation\n(D) transpiration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.375, "loss": 4.099184513092041 }, { "id": "identity_132208", "image": "Super-CLEVR/images/superCLEVR_new_001531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.015625, "loss": 4.495607852935791 }, { "id": "identity_37883", "image": "IconQA/images/43344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of circles even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.46875, "loss": 4.3830766677856445 }, { "id": "identity_276972", "image": "TQA/images/abc_question_images/parts_neuron_17225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the dendrite in the diagram? \n(A) M\n(B) L\n(C) N\n(D) R" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.213072776794434 }, { "id": "identity_60698", "image": "DVQA/images/bar_train_00182644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for test?" }, { "from": "gpt", "value": "The answer is spread" } ], "clip_score": 20.453125, "loss": 4.069169521331787 }, { "id": "identity_277488", "image": "VizWiz/images/VizWiz_train_00009553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of this bag?" }, { "from": "gpt", "value": "The answer is black red" } ], "clip_score": 27.515625, "loss": 4.475882053375244 }, { "id": "identity_300594", "image": "A-OKVQA/images/231466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these waterways similar in function to in other places?\nChoices:\n(A) streams\n(B) lakes\n(C) tarmacs\n(D) streets" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.421875, "loss": 4.172853469848633 }, { "id": "identity_107092", "image": "A-OKVQA/images/250592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible flagpoles are there on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.296875, "loss": 4.747440338134766 }, { "id": "identity_39202", "image": "DVQA/images/bar_train_00176940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label has the highest value for 'twenty'?" }, { "from": "gpt", "value": "The answer is relief" } ], "clip_score": 23.4375, "loss": 4.527127742767334 }, { "id": "identity_35787", "image": "A-OKVQA/images/381948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transport aircraft flies above?\nChoices:\n(A) cargo\n(B) passenger\n(C) private\n(D) military" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.421875, "loss": 4.05741548538208 }, { "id": "identity_338337", "image": "A-OKVQA/images/406734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event will these individuals take part in?\nChoices:\n(A) motorcycle parade\n(B) marathon\n(C) protest\n(D) touring" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.328125, "loss": 4.072600364685059 }, { "id": "identity_328386", "image": "A-OKVQA/images/143569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which national museum is nearby?\nChoices:\n(A) bar\n(B) temple\n(C) print\n(D) grafton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2578125, "loss": 4.041921138763428 }, { "id": "identity_111286", "image": "VQA2.0/images/305710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing white traditional attire?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 22.6875, "loss": 4.5612287521362305 }, { "id": "identity_328095", "image": "PMC-VQA/images/PMC4538772_fig3_415205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of scan was used to detect the bone lesions?\nChoices:\n(A) PET scan\n(B) EEG scan\n(C) MRI scan\n(D) Axial CT scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.515625, "loss": 4.012446880340576 }, { "id": "identity_7121", "image": "FigureQA/images/57294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.671875, "loss": 4.860992908477783 }, { "id": "identity_225480", "image": "IconQA/images/33184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total time each friend waited if the bus arrived at 12:00 PM. Who waited the longest?" }, { "from": "gpt", "value": "The answer is Ron" } ], "clip_score": 22.890625, "loss": 4.4405670166015625 }, { "id": "identity_331149", "image": "A-OKVQA/images/251255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word best describes this place?\nChoices:\n(A) sunny\n(B) snowy\n(C) rainy\n(D) tsunami" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.03125, "loss": 4.7875471115112305 }, { "id": "identity_64723", "image": "ScienceQA/images/3189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.25, "loss": 4.878129005432129 }, { "id": "identity_297578", "image": "Super-CLEVR/images/superCLEVR_new_001017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small purple trucks than shiny cars?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.140625, "loss": 5.087421417236328 }, { "id": "identity_233219", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g002_437031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the distribution of IED indicated by the white arrow, and what is the significance of the color coding in the figure?" }, { "from": "gpt", "value": "The answer is Distribution from occipital to temporal, red/yellow for increased activity" } ], "clip_score": 30.578125, "loss": 4.066092491149902 }, { "id": "identity_288629", "image": "AI2D/images/753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is the arrow in the diagram indicating towards? \nOptions:\n(A) The direction to the nearest shopping mall\n(B) The direction to the Sun\n(C) The Moon\n(D) The Earth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.375, "loss": 4.265773296356201 }, { "id": "identity_71231", "image": "ScienceQA/images/6426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is located on the northern tip of the highlighted continent?" }, { "from": "gpt", "value": "The answer is Venezuela" } ], "clip_score": 27.265625, "loss": 4.373752117156982 }, { "id": "identity_322112", "image": "PMC-VQA/images/PMC7291456_Fig2_191452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was employed to obtain the photomicrographs?\nChoices:\n(A) Fluorescence microscopy \n(B) Phase contrast microscopy \n(C) Dark-field microscopy \n(D) Transmission electron microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.558741569519043 }, { "id": "identity_266729", "image": "FigureQA/images/86691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.074650764465332 }, { "id": "identity_266809", "image": "DVQA/images/bar_train_00025232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which category does the color mediumpurple belong?" }, { "from": "gpt", "value": "The answer is date" } ], "clip_score": 19.625, "loss": 4.18733024597168 }, { "id": "identity_246249", "image": "A-OKVQA/images/573973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are on the desk that are used for writing or drawing, and what are they?" }, { "from": "gpt", "value": "The answer is Four pens/pencils" } ], "clip_score": 18.703125, "loss": 4.019139289855957 }, { "id": "identity_23467", "image": "A-OKVQA/images/399258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why do the horses wear leg coverings?\nChoices:\n(A) parade elevation\n(B) armor\n(C) decoration\n(D) digging tools" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 4.0527729988098145 }, { "id": "identity_259903", "image": "VQA2.0/images/178000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name on the shirt of the player who is directly in front of number 17 and wearing a yellow jersey?" }, { "from": "gpt", "value": "The answer is CUI" } ], "clip_score": 11.484375, "loss": 4.92109489440918 }, { "id": "identity_338290", "image": "PMC-VQA/images/PMC4592368_Fig1_429401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging is used to create the images in the middle column?\nChoices:\n(A) T1-weighted MPRAGE images\n(B) raw axial T2-weighted FLAIR images\n(C) both T1-weighted MPRAGE and axial T2-weighted FLAIR images\n(D) a different type of imaging technique" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.399561405181885 }, { "id": "identity_281493", "image": "PMC-VQA/images/PMC6724363_advs1235-fig-0004_33505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MRI sequence was utilized to capture the images: T2-weighted, T1-weighted, Diffusion-weighted, or Proton-density-weighted?\nChoices:\n(A) T2-weighted \n(B) T1-weighted \n(C) Diffusion-weighted \n(D) Proton-density-weighted" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.671875, "loss": 4.010411262512207 }, { "id": "identity_287369", "image": "PMC-VQA/images/PMC7973355_Fig21_394867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the fluid accumulation located?\nChoices:\n(A) Anterior to the bladder\n(B) Within the prostate gland\n(C) Lateral to the bladder\n(D) Behind the bladder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.59375, "loss": 4.143408298492432 }, { "id": "identity_304396", "image": "A-OKVQA/images/163479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of water body is shown here?\nChoices:\n(A) pond\n(B) ocean\n(C) lake\n(D) river" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.21875, "loss": 4.385008335113525 }, { "id": "identity_291377", "image": "A-OKVQA/images/310808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the roof of the boat with a few people on it?\nOptions:\n(A) purple\n(B) red\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.015625, "loss": 4.065870761871338 }, { "id": "identity_299528", "image": "PMC-VQA/images/PMC7171746_Fig1_155181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sign is visible in the brain MRI in option A, B, C, or D?\nChoices:\n(A) SCP sign \n(B) PCA sign \n(C) MCP sign \n(D) MCA sign" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.1875, "loss": 4.009183406829834 }, { "id": "identity_275775", "image": "TQA/images/question_images/parts_ocean_floor_9205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which feature connects the continental slope to the abyssal plain: shoreline, submarine canyon, continental rise, or coastal plain?\nChoices:\n(A) shoreline\n(B) submarine canyon\n(C) continental rise\n(D) coastal plain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.5, "loss": 4.028255462646484 }, { "id": "identity_333041", "image": "PMC-VQA/images/PMC6469836_Fig1_467056.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of sequence was used to capture this image?\nChoices:\n(A) T2w axial \n(B) T1w sagittal \n(C) T1w coronal \n(D) FLAIR coronal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.265702247619629 }, { "id": "identity_206766", "image": "VQA2.0/images/534468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.421875, "loss": 4.447542190551758 }, { "id": "identity_2379", "image": "ScienceQA/images/887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Camille's experiment can best answer.\nChoices:\n(A) Does the humidity level where tomato seeds are planted affect the number of tomato seedlings that grow?\n(B) Do more tomato seedlings grow when they are planted in soil with fertilizer compared to soil without fertilizer?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.796875, "loss": 4.013852119445801 }, { "id": "identity_283671", "image": "PMC-VQA/images/PMC8106577_Fig1_436021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the SEM image?\nOptions:\n(A) a fully developed gerbera flower head.\n(B) a flower head with natural damage.\n(C) a budding gerbera.\n(D) a gerbera leaf." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.109375, "loss": 4.232190132141113 }, { "id": "identity_301925", "image": "DVQA/images/bar_train_00168561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.890625, "loss": 4.140566825866699 }, { "id": "identity_297141", "image": "ScienceQA/images/12588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of South Carolina?\nChoices:\n(A) Jackson\n(B) Charleston\n(C) Columbia\n(D) Trenton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.015625, "loss": 4.045876979827881 }, { "id": "identity_283464", "image": "TabMWP/images/13379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.8125, "loss": 4.507072448730469 }, { "id": "identity_146836", "image": "DocVQA/images/kzbn0226_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What should be included in the protocol according to EPA?" }, { "from": "gpt", "value": "The answer is strengths and limitations from different streams" } ], "clip_score": 26.234375, "loss": 4.939443588256836 }, { "id": "identity_221178", "image": "PMC-VQA/images/PMC2575188_F2_29522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color surrounding the indicated tumor?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 27.59375, "loss": 4.459869384765625 }, { "id": "identity_269822", "image": "DVQA/images/bar_train_00095895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label is located fourth from the left in each group?" }, { "from": "gpt", "value": "The answer is switch" } ], "clip_score": 22.5, "loss": 4.35336446762085 }, { "id": "identity_316752", "image": "A-OKVQA/images/225919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who put this animal in the box?\nChoices:\n(A) hobo\n(B) dorothy\n(C) bike rider\n(D) wicked witch" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.150886535644531 }, { "id": "identity_293575", "image": "TextVQA/images/951d338fd366d398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this establishment a store?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 19.09375, "loss": 4.861255168914795 }, { "id": "identity_263384", "image": "PMC-VQA/images/PMC7981177_fig1_396137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which position is depicted in the image provided, with the options being Extension position, Flexion position, Supine position, and Prone position?\nChoices:\n(A) Extension position \n(B) Flexion position \n(C) Supine position \n(D) Prone position" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.237013339996338 }, { "id": "identity_264902", "image": "ScienceQA/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following organisms, which one contains matter that was previously part of the phytoplankton?\nChoices:\n(A) sea otter\n(B) kelp bass\n(C) kelp" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.875, "loss": 4.709609031677246 }, { "id": "identity_119203", "image": "VQA2.0/images/344566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person in the center?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.59375, "loss": 4.797209739685059 }, { "id": "identity_284066", "image": "A-OKVQA/images/312926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is being held by the person wearing green?\nChoices:\n(A) tray\n(B) soda\n(C) baton\n(D) childs hand" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.46484375, "loss": 4.376272678375244 }, { "id": "identity_18730", "image": "ScienceQA/images/15014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Trenton\n(B) Montpelier\n(C) Providence\n(D) Newport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.390625, "loss": 4.140719890594482 }, { "id": "identity_293714", "image": "PMC-VQA/images/PMC2900746_F0001_68373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which condition was eliminated by the X-ray imaging? \nChoices:\n(A) Bronchitis \n(B) Lung abscess \n(C) Pneumothorax \n(D) Cystic fibrosis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.71875, "loss": 4.005114555358887 }, { "id": "identity_124329", "image": "PMC-VQA/images/PMC8936940_f2_227285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fluorescence on the left side of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.640625, "loss": 4.031650066375732 }, { "id": "identity_27048", "image": "Super-CLEVR/images/superCLEVR_new_001769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the cruiser and the green object behind the big matte motorbike have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 35.9375, "loss": 5.037921905517578 }, { "id": "identity_28064", "image": "A-OKVQA/images/56145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the hill above the sea brown with furrows?\nChoices:\n(A) sports field\n(B) rodeo\n(C) farm land\n(D) housing development" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.6328125, "loss": 4.184330940246582 }, { "id": "identity_189282", "image": "A-OKVQA/images/187286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the store with the telephone number visible in the image?" }, { "from": "gpt", "value": "The answer is Kameta" } ], "clip_score": 12.421875, "loss": 4.713778495788574 }, { "id": "identity_23387", "image": "ScienceQA/images/14710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) bouncy\n(B) opaque\n(C) translucent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 5.7734375, "loss": 4.424347877502441 }, { "id": "identity_128416", "image": "PMC-VQA/images/PMC6922538_F2_87534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the medical image?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 26.875, "loss": 4.020496845245361 }, { "id": "identity_144551", "image": "MapQA/images/map_9778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a resident range between 1949976-3283992?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 29.3125, "loss": 4.1857991218566895 }, { "id": "identity_186980", "image": "A-OKVQA/images/196418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic cones are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 22.390625, "loss": 5.081725597381592 }, { "id": "identity_324219", "image": "PMC-VQA/images/PMC5986944_F4_324142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is shown in the image?\nChoices:\n(A) Lung\n(B) Heart\n(C) Brain\n(D) Liver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.203125, "loss": 4.654812812805176 }, { "id": "identity_293694", "image": "A-OKVQA/images/554636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of business is being promoted on the building with white letters?\nChoices:\n(A) electronics store\n(B) sports retailer\n(C) food chain\n(D) repair shop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.6640625, "loss": 4.17072057723999 }, { "id": "identity_20079", "image": "PMC-VQA/images/PMC8305520_Fig2_17359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pulmonary nodule located?\nChoices:\n(A) Right upper lobe \n(B) Right lower lobe \n(C) Left upper lobe \n(D) Left lower lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.4375, "loss": 4.378018856048584 }, { "id": "identity_326265", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of structure is located in the top right corner of the train?\nChoices:\n(A) fire\n(B) statue\n(C) lighthouse\n(D) skyscraper" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.015625, "loss": 4.230281829833984 }, { "id": "identity_256245", "image": "IconQA/images/75152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 28.40625, "loss": 4.489036560058594 }, { "id": "identity_289753", "image": "PMC-VQA/images/PMC8425872_acm213355-fig-0020_53993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of case is represented by the dashed dose-volume histogram lines?\nChoices:\n(A) shielded lung case\n(B) unshielded case\n(C) typical lung case\n(D) atypical lung case" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.640625, "loss": 4.1207709312438965 }, { "id": "identity_334911", "image": "DVQA/images/bar_train_00112888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the least accuracy?" }, { "from": "gpt", "value": "The answer is means" } ], "clip_score": 32.875, "loss": 4.734355449676514 }, { "id": "identity_213216", "image": "A-OKVQA/images/407528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the streetlights?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.234375, "loss": 4.608053684234619 }, { "id": "identity_302421", "image": "A-OKVQA/images/485413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of horseback riding style is shown here?\nChoices:\n(A) western\n(B) arabian\n(C) english\n(D) group" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.890625, "loss": 4.012861728668213 }, { "id": "identity_322806", "image": "VizWiz/images/VizWiz_train_00004608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the poster in the image?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.4765625, "loss": 4.326268672943115 }, { "id": "identity_337362", "image": "PMC-VQA/images/PMC8165677_Fig1_456707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely condition of the individual?\nChoices:\n(A) Appendicitis\n(B) Pneumonia\n(C) Malaria\n(D) Sinusitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.03125, "loss": 4.689095497131348 }, { "id": "identity_126618", "image": "VQA-AS/images/12912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many portraits are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.640625, "loss": 4.547605037689209 }, { "id": "identity_275613", "image": "ScienceQA/images/21142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of West Virginia?\nChoices:\n(A) Laramie\n(B) Charleston\n(C) Tallahassee\n(D) Columbus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.9375, "loss": 4.237138748168945 }, { "id": "identity_218154", "image": "TextVQA/images/851cffc716532542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children are wearing a bow in their hair?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.765625, "loss": 4.531031608581543 }, { "id": "identity_27005", "image": "Super-CLEVR/images/superCLEVR_new_000824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that has the same shape as the small green metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.046875, "loss": 5.032155990600586 }, { "id": "identity_17509", "image": "DVQA/images/bar_train_00116945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is sand" } ], "clip_score": 21.328125, "loss": 4.470456123352051 }, { "id": "identity_276589", "image": "AI2D/images/2166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does label C represent in the given options?\nChoices:\n(A) Whitehead\n(B) Fungus overwintering on roots as mycelium\n(C) Runner hyphae on roots\n(D) Severe infection of plant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.46875, "loss": 4.281870365142822 }, { "id": "identity_328692", "image": "ScienceQA/images/18636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) yellow\n(B) translucent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.046875, "loss": 4.286856174468994 }, { "id": "identity_6315", "image": "PMC-VQA/images/PMC4402572_fig10_379167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is presented in the image? \nChoices:\n(A) Face mask \n(B) Brain mask \n(C) Heart mask \n(D) Liver mask " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.226809501647949 }, { "id": "identity_204026", "image": "PMC-VQA/images/PMC5583184_Fig5_211156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to label structures in this image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.265625, "loss": 4.0370588302612305 }, { "id": "identity_30002", "image": "A-OKVQA/images/261487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who was the most recent player of this sport to be on the cover of Sports Illustrated?\nChoices:\n(A) naomi osaka\n(B) andre agassi\n(C) monica seles\n(D) serena williams" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.25, "loss": 4.092749118804932 }, { "id": "identity_298607", "image": "A-OKVQA/images/366789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What accessory is the woman on the bicycle wearing?\nChoices:\n(A) hat\n(B) crown\n(C) backpack\n(D) tiara" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.0, "loss": 4.3629560470581055 }, { "id": "identity_281593", "image": "PMC-VQA/images/PMC7306660_FIG2_196315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the high-intensity uptake observed in the left leg? \nOptions:\n(A) Ankle\n(B) Thigh\n(C) Knee\n(D) Hip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.921875, "loss": 4.752411842346191 }, { "id": "identity_294941", "image": "VQA2.0/images/174185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person in the buggy wearing shoes?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.46875, "loss": 4.142457485198975 }, { "id": "identity_283547", "image": "PMC-VQA/images/PMC3552813_f4_180745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the black arrows in the image provided?\nChoices:\n(A) Areas of unfilled tunneling and resorption space \n(B) Fatty bone marrow changes \n(C) Cartilage bars in cortical bone \n(D) Areas of cortical bone loss" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.5, "loss": 4.924635410308838 }, { "id": "identity_293529", "image": "ScienceQA/images/20657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\n(A) Jefferson City\n(B) Albany\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.1875, "loss": 4.027078628540039 }, { "id": "identity_182547", "image": "VQA2.0/images/219859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible cyclists are wearing helmets?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.015625, "loss": 4.475409984588623 }, { "id": "identity_337974", "image": "A-OKVQA/images/162792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who was the previous owner of this railway?\nChoices:\n(A) orient express\n(B) amtrak\n(C) greyhound\n(D) cn" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.0, "loss": 4.012956142425537 }, { "id": "identity_185166", "image": "TextVQA/images/08a375b6c5e291be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the individual's jacket?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.2421875, "loss": 4.140521049499512 }, { "id": "identity_139595", "image": "MapQA/images/map_9295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage range of woods between 21.2% and 32.2%?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 34.34375, "loss": 4.070071220397949 }, { "id": "identity_128639", "image": "TextVQA/images/79c6d4bf9082b322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.9375, "loss": 4.272751331329346 }, { "id": "identity_12566", "image": "Super-CLEVR/images/superCLEVR_new_000070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny car left of the rubber road bike in front of the tiny yellow motorbike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.21875, "loss": 4.906739711761475 }, { "id": "identity_47280", "image": "DocVQA/images/nxxw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name signed at the bottom of the page?" }, { "from": "gpt", "value": "The answer is Miles J" } ], "clip_score": 21.859375, "loss": 5.0413994789123535 }, { "id": "identity_150342", "image": "ScienceQA/images/13649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items in the image are primarily made of rubber?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.21875, "loss": 4.456259727478027 }, { "id": "identity_221179", "image": "PMC-VQA/images/PMC2575188_F2_29522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct areas of high activity are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.71875, "loss": 4.725646018981934 }, { "id": "identity_269852", "image": "Super-CLEVR/images/superCLEVR_new_000724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the color of the large object behind the blue car match the color of the matte wagon?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.984375, "loss": 5.376819610595703 }, { "id": "identity_291170", "image": "ScienceQA/images/16742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Vermont?\nChoices:\n(A) Cheyenne\n(B) Savannah\n(C) Montpelier\n(D) Burlington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.578125, "loss": 4.261084079742432 }, { "id": "identity_320884", "image": "ScienceQA/images/15490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) sticky\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.800821781158447 }, { "id": "identity_32076", "image": "A-OKVQA/images/110251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is behind the fence?\nChoices:\n(A) cat\n(B) dog\n(C) antelope\n(D) playground" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.515625, "loss": 4.250744819641113 }, { "id": "identity_156292", "image": "ScienceQA/images/20856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the north pole of the magnets?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 31.46875, "loss": 4.022533416748047 }, { "id": "identity_122823", "image": "PMC-VQA/images/PMC7475899_materials-13-03463-f006_249677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the matrix in which the fibers are embedded?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.28125, "loss": 4.495525360107422 }, { "id": "identity_314753", "image": "ScienceQA/images/2261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) North America\n(B) Europe\n(C) Africa\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.046875, "loss": 4.4266037940979 }, { "id": "identity_246289", "image": "ScienceQA/images/14657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and name its capital. Then, list two major landmarks found in this capital city" }, { "from": "gpt", "value": "The answer is Utah State Capitol, Temple Square" } ], "clip_score": 28.984375, "loss": 4.044895648956299 }, { "id": "identity_120949", "image": "VQA2.0/images/450748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the olive oil bottle on the table?" }, { "from": "gpt", "value": "The answer is SANTO" } ], "clip_score": 19.046875, "loss": 4.193598747253418 }, { "id": "identity_221793", "image": "A-OKVQA/images/101390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person in the doorway holding?" }, { "from": "gpt", "value": "The answer is A sign or cardboard" } ], "clip_score": 15.5078125, "loss": 4.712504863739014 }, { "id": "identity_266456", "image": "TextVQA/images/3658ba6668c2255b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letters are displayed on the musician's shirt?" }, { "from": "gpt", "value": "The answer is pe" } ], "clip_score": 17.359375, "loss": 4.919018745422363 }, { "id": "identity_14544", "image": "ScienceQA/images/6921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Connecticut\n(B) North Carolina\n(C) New Hampshire\n(D) Massachusetts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.575211524963379 }, { "id": "identity_313038", "image": "IconQA/images/36909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of diamonds even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0, "loss": 4.009395122528076 }, { "id": "identity_278152", "image": "ScienceQA/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category of rock does tillite belong to?\nChoices:\n(A) igneous\n(B) sedimentary\n(C) metamorphic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.671875, "loss": 4.055895805358887 }, { "id": "identity_308303", "image": "Super-CLEVR/images/superCLEVR_new_001040.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the shiny object that is the same color as the small metal cruiser?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 27.796875, "loss": 4.240171909332275 }, { "id": "identity_126756", "image": "VizWiz/images/VizWiz_train_00017689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light sources are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.453125, "loss": 4.162956237792969 }, { "id": "identity_205169", "image": "A-OKVQA/images/536720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts the young girl is wearing?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 12.453125, "loss": 4.27538537979126 }, { "id": "identity_22859", "image": "PMC-VQA/images/PMC3032248_F0002_85963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the foreign body located in the image? \nChoices:\n(A) Bone \n(B) Plastic \n(C) Glass \n(D) Metal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.811391353607178 }, { "id": "identity_4934", "image": "A-OKVQA/images/225183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the best reason for these police to ride these animals?\nChoices:\n(A) speed\n(B) save energy\n(C) height advantage\n(D) mobility" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.9375, "loss": 4.801485061645508 }, { "id": "identity_12355", "image": "FigureQA/images/53265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.953125, "loss": 4.089036464691162 }, { "id": "identity_58316", "image": "DVQA/images/bar_train_00114935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference in the category of mouth?" }, { "from": "gpt", "value": "The answer is plug" } ], "clip_score": 27.1875, "loss": 4.651092529296875 }, { "id": "identity_1859", "image": "Super-CLEVR/images/superCLEVR_new_000958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a metallic thing of the same size as the bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.734375, "loss": 5.163455486297607 }, { "id": "identity_315278", "image": "A-OKVQA/images/321026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of route is depicted here?\nChoices:\n(A) freeway\n(B) raceway\n(C) nature path\n(D) railway" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.125, "loss": 4.493156433105469 }, { "id": "identity_148355", "image": "DocVQA/images/rnjf0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do participants caution against in terms of promoting therapy strategies?" }, { "from": "gpt", "value": "The answer is promoting against metformin" } ], "clip_score": 21.984375, "loss": 4.265758037567139 }, { "id": "identity_294173", "image": "Super-CLEVR/images/superCLEVR_new_001750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the number of small tandem bikes behind the purple airplane equal to the number of large gray metal articulated buses?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.03125, "loss": 4.5427021980285645 }, { "id": "identity_314572", "image": "TabMWP/images/23743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $745, is there a shortage or something else?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.421875, "loss": 4.235557556152344 }, { "id": "identity_265863", "image": "Super-CLEVR/images/superCLEVR_new_000696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the gray car and the cyan chopper share the same material composition?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.65625, "loss": 5.569263458251953 }, { "id": "identity_281641", "image": "TextVQA/images/aaea7b120c0aa2d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the magazine located in the bottom right corner?" }, { "from": "gpt", "value": "The answer is birds" } ], "clip_score": 15.0703125, "loss": 4.565766334533691 }, { "id": "identity_26681", "image": "A-OKVQA/images/373153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animal is shown?\nChoices:\n(A) domestic\n(B) aquatic\n(C) wild\n(D) stuffed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.921875, "loss": 4.379734992980957 }, { "id": "identity_254682", "image": "TextVQA/images/3b52bb753374d022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary activities taking place in this location, and what evidence supports your conclusion?" }, { "from": "gpt", "value": "The answer is Aircraft maintenance, assembly, or testing activities" } ], "clip_score": 30.140625, "loss": 4.0165228843688965 }, { "id": "identity_294512", "image": "PlotQA/images/84921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name or heading of the X-axis?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 18.890625, "loss": 4.1698198318481445 }, { "id": "identity_34879", "image": "A-OKVQA/images/507087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which music group would be able to use all of these boards without sharing?\nChoices:\n(A) cream\n(B) nsync\n(C) backstreet boys\n(D) spice girls" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.640625, "loss": 4.079399108886719 }, { "id": "identity_332822", "image": "PMC-VQA/images/PMC7330812_rcr2619-fig-0002_203688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the cause of the issue seen in the Chest CT?\nChoices:\n(A) Blood clot\n(B) Tissue growth\n(C) Procedural complications\n(D) Infection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.953125, "loss": 4.63957405090332 }, { "id": "identity_125065", "image": "A-OKVQA/images/468471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the bed?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.5, "loss": 4.110129356384277 }, { "id": "identity_111834", "image": "A-OKVQA/images/538581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flotation device is the dog on the left wearing?" }, { "from": "gpt", "value": "The answer is A green safety vest" } ], "clip_score": 30.28125, "loss": 4.602547645568848 }, { "id": "identity_11356", "image": "ScienceQA/images/5560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) lichen\n(B) bear sedge\n(C) Arctic fox" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.6875, "loss": 4.124192237854004 }, { "id": "identity_335173", "image": "Super-CLEVR/images/superCLEVR_new_000612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the thing behind the green utility bike and in front of the shiny chopper?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 29.84375, "loss": 4.416565418243408 }, { "id": "identity_43440", "image": "DocVQA/images/fxwg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the retailer who signed the agreement?" }, { "from": "gpt", "value": "The answer is Manager" } ], "clip_score": 19.515625, "loss": 4.405635356903076 }, { "id": "identity_315192", "image": "VizWiz/images/VizWiz_train_00023629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this thing?" }, { "from": "gpt", "value": "The answer is dell desktop" } ], "clip_score": 30.375, "loss": 5.022325038909912 }, { "id": "identity_276552", "image": "VizWiz/images/VizWiz_train_00017077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the name of this flower?" }, { "from": "gpt", "value": "The answer is variety desert sage most similar to salvia gracias hybrid sage" } ], "clip_score": 29.890625, "loss": 4.9722065925598145 }, { "id": "identity_29640", "image": "PMC-VQA/images/PMC7728654_Fig2_318208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image in d represent? \nChoices:\n(A) Initial scan before treatment \n(B) Scan during treatment \n(C) Scan after treatment \n(D) No relation to treatment " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.828125, "loss": 4.639244079589844 }, { "id": "identity_298424", "image": "A-OKVQA/images/445325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what category would you place the activity for which these are utilized?\nChoices:\n(A) work\n(B) science\n(C) sports\n(D) school" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.6015625, "loss": 4.407122611999512 }, { "id": "identity_24721", "image": "A-OKVQA/images/328216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people hoping to cross?\nChoices:\n(A) train tracks\n(B) pirate swords\n(C) rooftops\n(D) river" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.640625, "loss": 4.124829292297363 }, { "id": "identity_32799", "image": "A-OKVQA/images/266200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The person under the orange umbrella is whom?\nChoices:\n(A) dog walker\n(B) police officer\n(C) queen\n(D) life guard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.234375, "loss": 4.397273540496826 }, { "id": "identity_30410", "image": "PMC-VQA/images/PMC7309522_F1_197109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the MRI scan shown in the images? \nChoices:\n(A) COVID-19 symptoms \n(B) Brain inflammation \n(C) Spinal cord injury \n(D) Lung cancer " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.5, "loss": 4.479013919830322 }, { "id": "identity_319883", "image": "PMC-VQA/images/PMC7215337_ijms-21-02817-f007_166978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in this area?\nChoices:\n(A) Organized lymphoid tissue \n(B) Degenerated lymphoid tissue \n(C) Adipose tissue \n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 4.722285270690918 }, { "id": "identity_289973", "image": "TextVQA/images/e54c260f1bd6f6d1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which name is shown on the banner?" }, { "from": "gpt", "value": "The answer is telemoucto" } ], "clip_score": 15.8828125, "loss": 4.783827304840088 }, { "id": "identity_330580", "image": "PMC-VQA/images/PMC9146591_F2_295859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this X-ray indicate about the abdomen?\nChoices:\n(A) Large bowel obstruction proximal to the transverse colon\n(B) No bowel obstruction\n(C) Small bowel obstruction distal to the transverse colon\n(D) Small and large bowel obstruction distal to the transverse colon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.4375, "loss": 4.20809268951416 }, { "id": "identity_319371", "image": "TextVQA/images/e5a3b9f29d8abcb9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the shirt of the person on the right?" }, { "from": "gpt", "value": "The answer is amn" } ], "clip_score": 14.640625, "loss": 5.095739364624023 }, { "id": "identity_35743", "image": "A-OKVQA/images/250194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the street?\nChoices:\n(A) suzuran\n(B) mulberry\n(C) yancy\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.359375, "loss": 4.153087615966797 }, { "id": "identity_113228", "image": "A-OKVQA/images/411642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the dugout?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.9375, "loss": 4.8430633544921875 }, { "id": "identity_274300", "image": "PMC-VQA/images/PMC7298131_F2_193612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the caption indicate with solid arrows pointing to it?\nChoices:\n(A) dilated bronchi \n(B) stomach \n(C) liver \n(D) lung nodules" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.578125, "loss": 4.464334964752197 }, { "id": "identity_21024", "image": "A-OKVQA/images/297419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is near the flowers?\nChoices:\n(A) dog\n(B) rooster\n(C) cat\n(D) cow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.046875, "loss": 4.112314224243164 }, { "id": "identity_219105", "image": "PMC-VQA/images/PMC6494259_F1_473982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lesion on the left or right side of the brain?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 31.734375, "loss": 4.076189994812012 }, { "id": "identity_300082", "image": "ScienceQA/images/10111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property fits this item?\nChoices:\n(A) rough\n(B) transparent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.21875, "loss": 4.74408483505249 }, { "id": "identity_20588", "image": "PMC-VQA/images/PMC5562887_Fig5_205055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the distance between the thick yellow arrows illustrate in this image? \nChoices:\n(A) The thickness of L2. \n(B) The distance between the inner and outer borders. \n(C) The hyperechoic layers of the bronchial wall. \n(D) The airway lumen. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 4.2172746658325195 }, { "id": "identity_333074", "image": "A-OKVQA/images/306030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the brand company of this laptop headquartered?\nChoices:\n(A) italy\n(B) india\n(C) japan\n(D) china" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.453125, "loss": 4.055944442749023 }, { "id": "identity_278287", "image": "Super-CLEVR/images/superCLEVR_new_000045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the color of the bicycle and the shiny wagon identical?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.9375, "loss": 5.447951793670654 }, { "id": "identity_283779", "image": "A-OKVQA/images/398494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of kitchen in the following options:\n(A) kitchenette\n(B) galley\n(C) peninsula kitchen\n(D) island kitchen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.53790283203125 }, { "id": "identity_265592", "image": "A-OKVQA/images/141586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brand of the hat advertised above the dugout? Is it:\n(A) zephyr\n(B) new era\n(C) brixton\n(D) kangol" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.625, "loss": 4.246047496795654 }, { "id": "identity_285442", "image": "PMC-VQA/images/PMC3884609_fig1_257219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion observed on B-mode US located among the following choices: Right kidney, Left kidney, Liver, or Pancreas?\nChoices:\n(A) Right kidney \n(B) Left kidney \n(C) Liver \n(D) Pancreas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.1875, "loss": 4.141688823699951 }, { "id": "identity_23031", "image": "A-OKVQA/images/386853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are all these phones here?\nChoices:\n(A) stolen\n(B) being charged\n(C) owner rich\n(D) for sale" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.0625, "loss": 4.102809906005859 }, { "id": "identity_23441", "image": "A-OKVQA/images/251252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of bike is this?\nChoices:\n(A) scooter\n(B) motorbike\n(C) bicycle\n(D) vespa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.3046875, "loss": 4.388134002685547 }, { "id": "identity_112840", "image": "VQA2.0/images/481222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the catcher's mitt?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.40625, "loss": 4.308191776275635 }, { "id": "identity_332060", "image": "A-OKVQA/images/412760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these creatures found?\nChoices:\n(A) mountains\n(B) beach\n(C) desert\n(D) arctic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.6875, "loss": 4.065357208251953 }, { "id": "identity_3198", "image": "IconQA/images/88484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of stars even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 4.3342485427856445 }, { "id": "identity_218709", "image": "TextVQA/images/0380f44cccbadc1c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.390625, "loss": 4.570891857147217 }, { "id": "identity_214135", "image": "VQA2.0/images/77195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole pizzas can be seen on the table?" }, { "from": "gpt", "value": "The answer is Zero" } ], "clip_score": 29.9375, "loss": 4.122889518737793 }, { "id": "identity_296592", "image": "PMC-VQA/images/PMC4658336_f0015_447443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image shown?\nChoices:\n(A) A protein complex\n(B) A microorganism\n(C) A 3D printer\n(D) A chemical compound." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.59375, "loss": 4.095485210418701 }, { "id": "identity_297202", "image": "A-OKVQA/images/522135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which tool is located on the bookshelf to the left of the desk with a monitor on it? \n(A) saw\n(B) hammer\n(C) chisel\n(D) screwdriver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.359375, "loss": 4.381117343902588 }, { "id": "identity_334510", "image": "PMC-VQA/images/PMC7803468_Fig5_341099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in this area?\nChoices:\n(A) Blood vessels\n(B) Sonographic air bronchogram\n(C) Intestinal gas\n(D) Muscle tissue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.078125, "loss": 4.56636905670166 }, { "id": "identity_193913", "image": "VQA2.0/images/166903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible windows are there on the building in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.28125, "loss": 4.424248695373535 }, { "id": "identity_110945", "image": "Super-CLEVR/images/superCLEVR_new_001983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest motorbike in the image?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 18.203125, "loss": 4.248333930969238 }, { "id": "identity_37580", "image": "PMC-VQA/images/PMC7417738_Fig2_232399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gyrus is shown in the image? \nChoices:\n(A) Temporal gyrus \n(B) Frontal gyrus \n(C) Parietal gyrus \n(D) Occipital gyrus " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.890625, "loss": 4.537527561187744 }, { "id": "identity_125390", "image": "A-OKVQA/images/333605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the parking meters?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.0625, "loss": 5.0403151512146 }, { "id": "identity_265305", "image": "A-OKVQA/images/327234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity is he currently engaged in? Is it:\n(A) skiing\n(B) surfing\n(C) hiking\n(D) running" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 4.419198036193848 }, { "id": "identity_31054", "image": "ScienceQA/images/6300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) bouncy\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.5078125, "loss": 4.265213966369629 }, { "id": "identity_25780", "image": "ScienceQA/images/12720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) salty\n(B) sweet\n(C) scratchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.3828125, "loss": 4.390515327453613 }, { "id": "identity_31713", "image": "PMC-VQA/images/PMC2365560_pone-0002125-g002_21766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique is used to visualize the tumors? \nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) PET scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.0625, "loss": 4.09390926361084 }, { "id": "identity_261476", "image": "Super-CLEVR/images/superCLEVR_new_000986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object that is made of the same material as the small wagon?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 18.515625, "loss": 4.188785076141357 }, { "id": "identity_303689", "image": "TQA/images/abc_question_images/muscle_fiber_17088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the muscle to the bone?\nChoices:\n(A) MITOCHONDRION\n(B) TENDON\n(C) FIBER\n(D) TISSUE" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.359375, "loss": 4.1585564613342285 }, { "id": "identity_298666", "image": "A-OKVQA/images/304493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for him extending his arm out?\nChoices:\n(A) for balance\n(B) to signal\n(C) to catch\n(D) to wave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.796875, "loss": 4.4686384201049805 }, { "id": "identity_312602", "image": "ScienceQA/images/13075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) North Carolina\n(B) Virginia\n(C) Connecticut\n(D) Georgia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.53125, "loss": 4.257371425628662 }, { "id": "identity_218613", "image": "Super-CLEVR/images/superCLEVR_new_001766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the golden car?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 29.859375, "loss": 4.212421417236328 }, { "id": "identity_301320", "image": "AI2D/images/2582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the sun rays in this diagram?\nChoices:\n(A) M\n(B) E\n(C) F\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.9375, "loss": 4.1177191734313965 }, { "id": "identity_315696", "image": "DocVQA/images/yynk0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which entity has created this policy?" }, { "from": "gpt", "value": "The answer is pilot insurance agency" } ], "clip_score": 25.15625, "loss": 4.271905422210693 }, { "id": "identity_325566", "image": "PMC-VQA/images/PMC5303254_Fig3_126212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the panel in the picture?\nChoices:\n(A) Blue\n(B) Green\n(C) Yellow\n(D) Red" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.51171875, "loss": 4.315319061279297 }, { "id": "identity_303344", "image": "ScienceQA/images/13930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Frankfort\n(B) Jefferson City\n(C) Minneapolis\n(D) Saint Paul" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.921875, "loss": 4.166957855224609 }, { "id": "identity_318103", "image": "PMC-VQA/images/PMC7251435_fig3_178850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the artifacts located in the image?\nChoices:\n(A) Ventricles \n(B) Tumors \n(C) Arteries \n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.545523643493652 }, { "id": "identity_30390", "image": "VQA2.0/images/567199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the child have a hat on?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.65625, "loss": 4.057043075561523 }, { "id": "identity_24328", "image": "PMC-VQA/images/PMC6819398_Fig8_57937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the bone destruction seen in the patient's body?\nChoices:\n(A) Right femur \n(B) Left tibia \n(C) Fifth lumbar vertebra \n(D) Right shoulder blade" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.234375, "loss": 4.155086517333984 }, { "id": "identity_331969", "image": "PMC-VQA/images/PMC5362993_Fig2_144559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk indicate in these scans?\nChoices:\n(A) Lung metastasis\n(B) Mediastinal adenopathy\n(C) Hilar vessels\n(D) Mediastinal vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.75, "loss": 4.910020351409912 }, { "id": "identity_107251", "image": "Super-CLEVR/images/superCLEVR_new_001356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the green bicycle?" }, { "from": "gpt", "value": "The answer is Aeroplane" } ], "clip_score": 31.859375, "loss": 4.2236714363098145 }, { "id": "identity_26533", "image": "A-OKVQA/images/411138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The plants are growing in what type of outdoor structure?\nChoices:\n(A) barn\n(B) greenhouse\n(C) nursery\n(D) sunroom" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0625, "loss": 4.284109115600586 }, { "id": "identity_10002", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g001_308584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the closed arrow in B indicate? \nChoices:\n(A) Axons, \n(B) Terminal field in hindbrain, \n(C) gX, \n(D) Control vagal ganglia. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.105724334716797 }, { "id": "identity_284117", "image": "PMC-VQA/images/PMC4579293_F10_425971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the lesion depicted in the image? Is it:\n(A) Green\n(B) Red\n(C) Blue\n(D) Yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.046875, "loss": 4.1907196044921875 }, { "id": "identity_189814", "image": "A-OKVQA/images/561570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.53125, "loss": 4.275806427001953 }, { "id": "identity_338296", "image": "A-OKVQA/images/412247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which athlete plays a sport similar to what this person is doing?\nChoices:\n(A) serena williams\n(B) cheryl swoopes\n(C) jennie finch\n(D) alex morgan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.546875, "loss": 4.063052177429199 }, { "id": "identity_335138", "image": "TQA/images/question_images/velocity_time_graphs_8210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red line indicate in this diagram?\nChoices:\n(A) acceleration\n(B) deceleration\n(C) increasing speed\n(D) steady speed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.4375, "loss": 4.392800331115723 }, { "id": "identity_334870", "image": "A-OKVQA/images/545987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing?\nChoices:\n(A) selling phone\n(B) checking phone\n(C) paying bill\n(D) getting help" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.109375, "loss": 4.606991767883301 }, { "id": "identity_322154", "image": "PMC-VQA/images/PMC7847261_DMM046995F3_356415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in this image?\nChoices:\n(A) Destruction of crypts\n(B) Fibrosis\n(C) Influx of inflammatory cells\n(D) Edema" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.28125, "loss": 4.186741352081299 }, { "id": "identity_259959", "image": "A-OKVQA/images/90393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can you determine the season based on the type of trees shown in the foreground and their current state?" }, { "from": "gpt", "value": "The answer is early spring" } ], "clip_score": 14.5703125, "loss": 4.6221747398376465 }, { "id": "identity_279959", "image": "AI2D/images/3279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which structure is represented by A?\nChoices:\n(A) mitochondrion\n(B) cell wall\n(C) cell membrane\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.15625, "loss": 4.359434127807617 }, { "id": "identity_197442", "image": "Super-CLEVR/images/superCLEVR_new_001429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car located to the left of the yellow motorcycle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.71875, "loss": 4.671922206878662 }, { "id": "identity_294290", "image": "A-OKVQA/images/546664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for his current posture?\nChoices:\n(A) falling\n(B) tired\n(C) slipping\n(D) maintain balance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.0625, "loss": 4.589463710784912 }, { "id": "identity_133574", "image": "A-OKVQA/images/399512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible vehicles are parked on the side of the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.859375, "loss": 4.7376909255981445 }, { "id": "identity_268221", "image": "PMC-VQA/images/PMC3852808_fig3_249325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the condition displayed in the image from the following options:\n(A) Pre-stenotic Dilation\n(B) Pre-hypertensive Constriction\n(C) Post-stenotic Dilation\n(D) Post-hypertensive Constriction" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 4.713699817657471 }, { "id": "identity_193124", "image": "TextVQA/images/5edf5a48928af699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the aircraft in the foreground with the propellers?" }, { "from": "gpt", "value": "The answer is Miss Conduct\" B-17G" } ], "clip_score": 29.1875, "loss": 4.106653213500977 }, { "id": "identity_211206", "image": "A-OKVQA/images/425497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any snow visible on the snowboard in the foreground?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.984375, "loss": 4.036372184753418 }, { "id": "identity_255796", "image": "A-OKVQA/images/395339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three distinct visual elements in the image that indicate the type of shop people are congregated in front of?" }, { "from": "gpt", "value": "The answer is Sign, second-hand items, community event" } ], "clip_score": 20.421875, "loss": 4.180349349975586 }, { "id": "identity_267547", "image": "TextVQA/images/b995fd81107bc80e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which laptop model is this?" }, { "from": "gpt", "value": "The answer is pabilion" } ], "clip_score": 25.90625, "loss": 4.664759159088135 }, { "id": "identity_306892", "image": "FigureQA/images/92768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.609375, "loss": 4.002999305725098 }, { "id": "identity_296735", "image": "PMC-VQA/images/PMC5601148_F5_216212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images is being referred to in the caption? \nOptions:\n(A) X-ray images\n(B) CT images\n(C) MRI images\n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.890625, "loss": 4.099668979644775 }, { "id": "identity_38024", "image": "IconQA/images/52030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.953125, "loss": 4.079319953918457 }, { "id": "identity_319625", "image": "PMC-VQA/images/PMC5409997_f0001_158114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is depicted in the caption?\nChoices:\n(A) X-Ray\n(B) MRI\n(C) CT Scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.1875, "loss": 4.639519214630127 }, { "id": "identity_187761", "image": "A-OKVQA/images/292752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are in the water excluding the large white sailboat?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.5625, "loss": 4.582916259765625 }, { "id": "identity_295570", "image": "A-OKVQA/images/102461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being observed by the officers present here? \nChoices:\n(A) party\n(B) horse race\n(C) candled cake\n(D) protest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.546875, "loss": 4.319996356964111 }, { "id": "identity_21338", "image": "TextVQA/images/caedb5851c642d53.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: according to the text, what is hard?" }, { "from": "gpt", "value": "The answer is response design" } ], "clip_score": 14.984375, "loss": 4.2209320068359375 }, { "id": "identity_272464", "image": "PMC-VQA/images/PMC1266401_F1_3693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the thrombus found among the following choices: A) Aortic valve B) Tricuspid valve C) Pulmonary valve D) Mitral valve?\nChoices:\n(A) Aortic valve \n(B) Tricuspid valve \n(C) Pulmonary valve \n(D) Mitral valve" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.046875, "loss": 4.009012699127197 }, { "id": "identity_292387", "image": "A-OKVQA/images/283290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which power enables the man to become airborne?\nChoices:\n(A) water pump\n(B) mental\n(C) solar\n(D) magic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.65625, "loss": 4.277866363525391 }, { "id": "identity_195340", "image": "A-OKVQA/images/659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 23.828125, "loss": 4.051267147064209 }, { "id": "identity_21236", "image": "PMC-VQA/images/PMC8302983_fig1_15775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the mitral prosthesis shown in the image? \nChoices:\n(A) Normal Functioning \n(B) Significant Regurgitation \n(C) Significant Stenosis \n(D) Calcification " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.178040504455566 }, { "id": "identity_12402", "image": "DVQA/images/bar_train_00179493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dataset does the steelblue color represent?" }, { "from": "gpt", "value": "The answer is length" } ], "clip_score": 24.0625, "loss": 4.097368240356445 }, { "id": "identity_176747", "image": "IconQA/images/94845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the flower's center?" }, { "from": "gpt", "value": "The answer is Light green" } ], "clip_score": 27.875, "loss": 5.586910247802734 }, { "id": "identity_270998", "image": "ScienceQA/images/9830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate? \nOptions:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.1484375, "loss": 4.253270149230957 }, { "id": "identity_8918", "image": "A-OKVQA/images/272807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of bikes are these?\nChoices:\n(A) electric\n(B) cruiser\n(C) children's\n(D) mountain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.0354485511779785 }, { "id": "identity_338221", "image": "ScienceQA/images/277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Frankfort\n(B) Nashville\n(C) Louisville\n(D) Lexington" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.71875, "loss": 4.268553733825684 }, { "id": "identity_277258", "image": "IconQA/images/62327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squares an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.125, "loss": 4.009254455566406 }, { "id": "identity_325071", "image": "TQA/images/question_images/hydrocarbons_7052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two carbon atoms in the hydrocarbon shown here were connected by a double bond, what would happen?\nChoices:\n(A) cyclic hydrocarbon\n(B) unsaturated hydrocarbon\n(C) saturated hydrocarbon\n(D) nothing would change" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.046875, "loss": 4.593874931335449 }, { "id": "identity_280594", "image": "FigureQA/images/71703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the country with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.296875, "loss": 4.226944923400879 }, { "id": "identity_309357", "image": "ScienceQA/images/2702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) each battery . . . the surroundings\n(B) the surroundings . . . each battery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.1875, "loss": 4.034143924713135 }, { "id": "identity_287299", "image": "ScienceQA/images/12301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction is the push directed in relation to the bottom magnet?\nChoices:\n(A) away from the bottom magnet\n(B) toward the bottom magnet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.0360307693481445 }, { "id": "identity_126350", "image": "PMC-VQA/images/PMC7355588_jcm-09-01934-f002_212446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hyperreflective foci are indicated by arrowheads in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.203125, "loss": 4.450126647949219 }, { "id": "identity_67893", "image": "TQA/images/question_images/sun_layers_6309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct zones or layers are labeled within the sun's structure?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.9375, "loss": 4.48842191696167 }, { "id": "identity_300203", "image": "Super-CLEVR/images/superCLEVR_new_000986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the large object that is made of the same material as the small wagon?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 18.921875, "loss": 4.1671576499938965 }, { "id": "identity_257259", "image": "DocVQA/images/zxyg0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the committee members mentioned in the first paragraph of the report?" }, { "from": "gpt", "value": "The answer is Drs. Oiso, Patwardan, Ranalingaswami, and Venger" } ], "clip_score": 24.40625, "loss": 4.389872074127197 }, { "id": "identity_304505", "image": "A-OKVQA/images/231379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make the sheet laid on by the person?\nChoices:\n(A) nylon\n(B) leather\n(C) wool\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1953125, "loss": 4.2756853103637695 }, { "id": "identity_269665", "image": "IconQA/images/43413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture follows the first image of a fish?\nChoices:\n(A) turtle\n(B) fish\n(C) sub" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.109375, "loss": 4.446866512298584 }, { "id": "identity_20050", "image": "A-OKVQA/images/354027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the young boy holding the older woman's hand?\nChoices:\n(A) for guidance\n(B) for protection\n(C) for fun\n(D) for play" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 4.08203125, "loss": 4.079113960266113 }, { "id": "identity_333313", "image": "A-OKVQA/images/387328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction are the three individuals on the right moving?\nChoices:\n(A) towards\n(B) right\n(C) away\n(D) left" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.7421875, "loss": 4.203313827514648 }, { "id": "identity_8791", "image": "A-OKVQA/images/489452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The man about to catch the frisbee wears what color of shirt?\nChoices:\n(A) blue\n(B) black\n(C) grey\n(D) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.296875, "loss": 4.215317249298096 }, { "id": "identity_103834", "image": "VizWiz/images/VizWiz_train_00000927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the gauge's frame made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 25.4375, "loss": 4.491730690002441 }, { "id": "identity_12816", "image": "FigureQA/images/41055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.046875, "loss": 4.168339729309082 }, { "id": "identity_32298", "image": "PMC-VQA/images/PMC4920187_BIO017384F2_44740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead point to in images I-K? \nChoices:\n(A) thio-T-positive SC35-negative particle \n(B) a thio-T and SC35-positive particle \n(C) nucleus \n(D) nucleolus " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.515625, "loss": 4.993019104003906 }, { "id": "identity_327302", "image": "ScienceQA/images/12921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) colorful\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.547268867492676 }, { "id": "identity_309648", "image": "TextVQA/images/bba20d711a767294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first line on the top of this page?" }, { "from": "gpt", "value": "The answer is on a watch sent home to" } ], "clip_score": 17.53125, "loss": 4.337279796600342 }, { "id": "identity_334006", "image": "ScienceQA/images/14760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about the solar system accurate or inaccurate?\nThe volume of Uranus is less than one-tenth of the volume of Saturn.\nChoices:\n(A) accurate\n(B) inaccurate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 4.272819995880127 }, { "id": "identity_301628", "image": "PMC-VQA/images/PMC6842499_Fig2_65273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 2. Which vertebra was the focus of this procedure?\nChoices:\n(A) C5. \n(B) C6. \n(C) C7. \n(D) C8." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.6875, "loss": 4.254829406738281 }, { "id": "identity_309575", "image": "TQA/images/question_images/human_system_respiratory_3759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part links the nasal and oral cavities to the lungs?\nChoices:\n(A) trachea\n(B) alveoli\n(C) bronchi\n(D) nostril" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.515625, "loss": 4.187750339508057 }, { "id": "identity_24450", "image": "A-OKVQA/images/311208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transport here is inanimate?\nChoices:\n(A) train\n(B) horse\n(C) bike\n(D) car" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.25, "loss": 4.016074180603027 }, { "id": "identity_331027", "image": "ScienceQA/images/14571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this place?\nChoices:\n(A) Baton Rouge\n(B) Phoenix\n(C) Honolulu\n(D) Hilo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0625, "loss": 4.146717071533203 }, { "id": "identity_295751", "image": "Super-CLEVR/images/superCLEVR_new_001582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the truck behind the red rubber car the same color as the rubber motorbike on the left side of the large gray wagon?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.21875, "loss": 4.972491264343262 }, { "id": "identity_287838", "image": "TQA/images/abc_question_images/parts_microscope_17184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is located below S in the given choices: N, W, U, or R?\nChoices:\n(A) N\n(B) W\n(C) U\n(D) R" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0625, "loss": 4.259092807769775 }, { "id": "identity_192190", "image": "A-OKVQA/images/178379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many riders are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.984375, "loss": 4.297362327575684 }, { "id": "identity_323501", "image": "IconQA/images/16562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of flowers even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.265625, "loss": 4.150252342224121 }, { "id": "identity_322039", "image": "ScienceQA/images/17828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three items?\nChoices:\n(A) sour\n(B) blue\n(C) salty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.1953125, "loss": 4.54677677154541 }, { "id": "identity_261808", "image": "FigureQA/images/29112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.3125, "loss": 4.139226913452148 }, { "id": "identity_311363", "image": "A-OKVQA/images/32818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these benches used for?\nChoices:\n(A) washed ashore\n(B) resting\n(C) for sale\n(D) surfing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.984375, "loss": 4.183272361755371 }, { "id": "identity_300131", "image": "Super-CLEVR/images/superCLEVR_new_000486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the rubber thing that is the same color as the big metallic object?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 20.953125, "loss": 4.301294803619385 }, { "id": "identity_338129", "image": "TQA/images/abc_question_images/seasons_10672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one indicates the Northern Hemisphere winter?\nChoices:\n(A) W\n(B) R\n(C) C\n(D) S" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.59375, "loss": 4.295605659484863 }, { "id": "identity_298328", "image": "A-OKVQA/images/349199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the boy holding up his shoe?\nChoices:\n(A) disposing sand\n(B) avoiding water\n(C) beating people\n(D) tying shoe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.78125, "loss": 4.543117046356201 }, { "id": "identity_13271", "image": "FigureQA/images/29408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki less than Mint?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.265625, "loss": 4.087133884429932 }, { "id": "identity_297632", "image": "TextVQA/images/708ba2e146e19132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of beer is in the green bottle?" }, { "from": "gpt", "value": "The answer is mark twain" } ], "clip_score": 24.765625, "loss": 4.414865016937256 }, { "id": "identity_335295", "image": "PMC-VQA/images/PMC7340009_F5_206330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of microscopy was used to view the cells?\nChoices:\n(A) Bright-field microscopy \n(B) TEM \n(C) Confocal laser scanning microscopy \n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5, "loss": 4.253864765167236 }, { "id": "identity_282902", "image": "IconQA/images/40599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.4375, "loss": 4.464934349060059 }, { "id": "identity_104422", "image": "PMC-VQA/images/PMC5831712_Fig2_282087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the yellow lines in the image?" }, { "from": "gpt", "value": "The answer is Outlining regions of interest or treatment planning" } ], "clip_score": 21.28125, "loss": 4.4205546379089355 }, { "id": "identity_118838", "image": "A-OKVQA/images/357684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the arena mentioned on the advertisement?" }, { "from": "gpt", "value": "The answer is Energie Arena" } ], "clip_score": 19.828125, "loss": 4.248050212860107 }, { "id": "identity_338433", "image": "A-OKVQA/images/27616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place?\nChoices:\n(A) garage\n(B) car show\n(C) car dealer\n(D) auto shop" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6171875, "loss": 4.5859222412109375 }, { "id": "identity_333140", "image": "IconQA/images/58270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of circles even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.089901447296143 }, { "id": "identity_222169", "image": "A-OKVQA/images/281177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 16.765625, "loss": 4.279679775238037 }, { "id": "identity_270048", "image": "PMC-VQA/images/PMC3579993_Fig19_187327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mediastinum is the lesion located in the patient? \n(A) Posterior mediastinum\n(B) Anterior mediastinum\n(C) Lateral mediastinum\n(D) Superior mediastinum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.209382057189941 }, { "id": "identity_284604", "image": "VQA-AS/images/14669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the boy have company?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.875, "loss": 4.0278801918029785 }, { "id": "identity_124912", "image": "A-OKVQA/images/173161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the background?" }, { "from": "gpt", "value": "The answer is Three vehicles" } ], "clip_score": 14.796875, "loss": 4.733935832977295 }, { "id": "identity_113296", "image": "Super-CLEVR/images/superCLEVR_new_000564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is located closest to the bottom edge of the image?" }, { "from": "gpt", "value": "The answer is A train" } ], "clip_score": 23.3125, "loss": 4.07568883895874 }, { "id": "identity_248259", "image": "VQA2.0/images/419599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many people are wearing headgear in the image?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 17.28125, "loss": 4.088679313659668 }, { "id": "identity_196464", "image": "PMC-VQA/images/PMC6747042_f1_38423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar at the bottom right of the image indicating?" }, { "from": "gpt", "value": "The answer is Reference for size measurement" } ], "clip_score": 21.5625, "loss": 4.509439945220947 }, { "id": "identity_277762", "image": "TextVQA/images/f16b2a8ec0ff1561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the number that comes after 21?" }, { "from": "gpt", "value": "The answer is duda" } ], "clip_score": 9.1015625, "loss": 4.433504581451416 }, { "id": "identity_10643", "image": "TQA/images/question_images/skin_cross_section_7579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the diagram, which part refers to the outer layer of the skin?\nChoices:\n(A) hypodermis\n(B) pore\n(C) epidermis\n(D) dermis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.125, "loss": 4.024605751037598 }, { "id": "identity_13227", "image": "AI2D/images/2710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of day is letter P\nChoices:\n(A) midnight\n(B) noon\n(C) sunset\n(D) sunrise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.921875, "loss": 4.111104488372803 }, { "id": "identity_137266", "image": "DocVQA/images/fzjf0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the strategic objective associated with clinical study TL-OPI-507?" }, { "from": "gpt", "value": "The answer is Adolescent data/use" } ], "clip_score": 25.453125, "loss": 4.294354438781738 }, { "id": "identity_133161", "image": "Super-CLEVR/images/superCLEVR_new_000119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle that is the same type as the bus?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.609375, "loss": 4.593311786651611 }, { "id": "identity_265582", "image": "FigureQA/images/70790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Navy Blue or Dodger Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.625, "loss": 4.479561805725098 }, { "id": "identity_316064", "image": "PMC-VQA/images/PMC2707627_pone-0006314-g006_41342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the imaging technique used to visualize this?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.578125, "loss": 4.144533634185791 }, { "id": "identity_327075", "image": "A-OKVQA/images/29709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is depicted?\nChoices:\n(A) exercise\n(B) boarding\n(C) waiting\n(D) dining" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.046875, "loss": 4.2278666496276855 }, { "id": "identity_106911", "image": "Super-CLEVR/images/superCLEVR_new_001742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located between the green car and the silver car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.5625, "loss": 4.556125640869141 }, { "id": "identity_17566", "image": "FigureQA/images/90809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.15625, "loss": 4.313024044036865 }, { "id": "identity_18309", "image": "VizWiz/images/VizWiz_train_00009523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of dog is this?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 23.6875, "loss": 4.424073696136475 }, { "id": "identity_46296", "image": "DocVQA/images/kggn0226_36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the study code indicated on the document?" }, { "from": "gpt", "value": "The answer is PiM" } ], "clip_score": 19.1875, "loss": 5.729956150054932 }, { "id": "identity_20976", "image": "PMC-VQA/images/PMC6295577_F2_411968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow pointing towards in the image?\nChoices:\n(A) An area of necrosis \n(B) An area of inflammation \n(C) An area of fibrosis \n(D) An area of normal morphology " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.140625, "loss": 4.858119010925293 }, { "id": "identity_149717", "image": "DocVQA/images/tllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main sections does the document have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.21875, "loss": 4.460701942443848 }, { "id": "identity_332794", "image": "IconQA/images/95828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.140625, "loss": 4.366522789001465 }, { "id": "identity_11377", "image": "TQA/images/abc_question_images/waves_17678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the crest\nChoices:\n(A) V\n(B) E\n(C) L\n(D) T" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 4.113694667816162 }, { "id": "identity_232677", "image": "ScienceQA/images/5380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual characteristics that indicate the clipboard is made of wood, and how do these characteristics differentiate it from concrete?" }, { "from": "gpt", "value": "The answer is wood" } ], "clip_score": 29.5625, "loss": 5.2618865966796875 }, { "id": "identity_70466", "image": "ScienceQA/images/18354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of this animal?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.875, "loss": 4.520230293273926 }, { "id": "identity_336890", "image": "A-OKVQA/images/548957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the round fruit beneath these apples?\nChoices:\n(A) red\n(B) green\n(C) blue\n(D) orange" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.640625, "loss": 4.05012845993042 }, { "id": "identity_263586", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items are available for sale at the stall located on the left?" }, { "from": "gpt", "value": "The answer is bun mang vit" } ], "clip_score": 28.265625, "loss": 4.931756019592285 }, { "id": "identity_336097", "image": "ScienceQA/images/10629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a mineral?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.40625, "loss": 4.237040996551514 }, { "id": "identity_294877", "image": "VizWiz/images/VizWiz_train_00011516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the last picture I took, you mentioned something about strawberry ice cream. It's a picture from an Avon book, showing someone holding an ice cream." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 35.28125, "loss": 4.397514820098877 }, { "id": "identity_307170", "image": "AI2D/images/3086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the hair-like structure on the surface of this called?\nChoices:\n(A) capsule\n(B) pilus\n(C) ribosome\n(D) cell wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5, "loss": 4.144354820251465 }, { "id": "identity_16910", "image": "IconQA/images/38120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.796875, "loss": 4.312286376953125 }, { "id": "identity_330259", "image": "IconQA/images/103557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a barn. Which image is fourth?\nChoices:\n(A) tractor\n(B) sun\n(C) barn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.046875, "loss": 4.144725799560547 }, { "id": "identity_185221", "image": "PMC-VQA/images/PMC6214924_Fig4_390574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is highlighted in green in the image?" }, { "from": "gpt", "value": "The answer is The right eye" } ], "clip_score": 22.109375, "loss": 4.058035373687744 }, { "id": "identity_298902", "image": "PMC-VQA/images/PMC4245492_Fig1_339455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in lane 3 of the image?\nChoices:\n(A) SATB1 \n(B) SATB2 \n(C) Molecular weight marker \n(D) Negative control lysate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8359375, "loss": 5.035972595214844 }, { "id": "identity_319777", "image": "TextVQA/images/4ec44c66f6a886d5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this machine?" }, { "from": "gpt", "value": "The answer is shopmaster" } ], "clip_score": 22.859375, "loss": 4.6777191162109375 }, { "id": "identity_306079", "image": "A-OKVQA/images/581458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of train is depicted here?\nChoices:\n(A) oil\n(B) coal\n(C) solar\n(D) elevated" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.546875, "loss": 4.182267189025879 }, { "id": "identity_289842", "image": "A-OKVQA/images/14321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is located to the left of the motorcycles among the following choices: dog, boat, cat, or person?\nChoices:\n(A) dog\n(B) boat\n(C) cat\n(D) person" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.53125, "loss": 4.295759201049805 }, { "id": "identity_329360", "image": "A-OKVQA/images/298151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this nut?\nChoices:\n(A) peanut\n(B) walnut\n(C) cashew\n(D) pistachio" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1875, "loss": 4.392210483551025 }, { "id": "identity_141519", "image": "DVQA/images/bar_train_00014377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'evil' algorithm?" }, { "from": "gpt", "value": "The answer is mirror" } ], "clip_score": 34.8125, "loss": 4.370104789733887 }, { "id": "identity_335078", "image": "PMC-VQA/images/PMC7132119_fig0005_145183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the liver was affected by this? \nChoices:\n(A) Left Hepatic Vein \n(B) Right Hepatic Vein \n(C) Middle Hepatic Vein \n(D) Inferior Vena Cava" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.28125, "loss": 4.252770900726318 }, { "id": "identity_317010", "image": "FigureQA/images/20684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.859375, "loss": 4.693522930145264 }, { "id": "identity_208185", "image": "VQA2.0/images/292146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle frame that is third from the right in the front row?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.0625, "loss": 4.248592376708984 }, { "id": "identity_299837", "image": "AI2D/images/737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Towards which celestial body does gravity pull objects on Earth? \n(A) star\n(B) moon\n(C) sun\n(D) saturn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.484375, "loss": 4.364738464355469 }, { "id": "identity_8583", "image": "ScienceQA/images/20807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Pittsburgh\n(B) Harrisburg\n(C) Hartford\n(D) Nashville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.15625, "loss": 4.407228946685791 }, { "id": "identity_262428", "image": "A-OKVQA/images/184431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the vehicle shown on the right?\nChoices:\n(A) van\n(B) train\n(C) bike\n(D) car" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9609375, "loss": 4.336772918701172 }, { "id": "identity_305066", "image": "PMC-VQA/images/PMC6450846_Fig2_461817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to create these MIP images?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) PET scan\n(D) MRI scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.1875, "loss": 4.384981632232666 }, { "id": "identity_26434", "image": "PMC-VQA/images/PMC7993516_Fig1_399517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the spinal condition in this patient? \nChoices:\n(A) Convex scoliosis \n(B) Kyphosis \n(C) Congenital scoliosis \n(D) Lumbar lordosis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.71875, "loss": 4.17877197265625 }, { "id": "identity_4940", "image": "AI2D/images/454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Using the food web below, identify which of the following choices shows the correct transfer of energy.\nChoices:\n(A) From squid to elephant seal\n(B) From krill to zooplankton\n(C) From killer whale to elephant seal\n(D) From penguin to krill" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.46875, "loss": 4.112545013427734 }, { "id": "identity_286222", "image": "PMC-VQA/images/PMC6097448_Fig1_355565.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which enhancement pattern is observed in the CT images of the neck? \nOptions:\n(A) peripheral enhancement pattern \n(B) central enhancement pattern \n(C) rim enhancement pattern \n(D) non-enhanced pattern" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 4.065057277679443 }, { "id": "identity_36308", "image": "A-OKVQA/images/199969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What language must someone speak in order to understand what items are offered?\nChoices:\n(A) english\n(B) spanish\n(C) french\n(D) italian" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.359375, "loss": 4.005053997039795 }, { "id": "identity_4727", "image": "FigureQA/images/59381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.0, "loss": 4.14768648147583 }, { "id": "identity_316017", "image": "PMC-VQA/images/PMC3141654_F11_103036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white dotted line indicate?\nChoices:\n(A) The progress of fusion\n(B) The progress of inversion\n(C) The progress of lateral subdivision\n(D) The progress of developing brain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 4.879554748535156 }, { "id": "identity_34322", "image": "PMC-VQA/images/PMC8107798_f1_436419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of staining was used in the image?\nChoices:\n(A) Fluorescent staining \n(B) Hematoxylin-Eosin staining \n(C) DAB staining \n(D) Iodine staining " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 4.851877212524414 }, { "id": "identity_319897", "image": "PMC-VQA/images/PMC6423896_F1_453476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed at the surgical site?\nChoices:\n(A) Enhancement of a cerebellum \n(B) Enhancement of an occipital lobe \n(C) Enhancement of a frontotemporal lobe \n(D) Enhancement of a parietal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.84375, "loss": 4.458585262298584 }, { "id": "identity_302438", "image": "TextVQA/images/003bed7fa4de9fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this black sign?" }, { "from": "gpt", "value": "The answer is lil siam" } ], "clip_score": 18.828125, "loss": 4.049455165863037 }, { "id": "identity_37976", "image": "PMC-VQA/images/PMC4355500_Fig2_365821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the ECG feature seen in the proband? \nChoices:\n(A) Right ventricular dilation \n(B) Left ventricular noncompaction \n(C) Left ventricular hypertrophy \n(D) Atrial fibrillation " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.140625, "loss": 4.057438850402832 }, { "id": "identity_296254", "image": "PMC-VQA/images/PMC7346301_FIG1_208850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the enhancing mass observed in the MRI located?\nChoices:\n(A) Left frontal lobe\n(B) Right frontal lobe\n(C) Left lateral ventricle\n(D) Right lateral ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.84375, "loss": 4.51463508605957 }, { "id": "identity_252171", "image": "TextVQA/images/272833ec5655d8de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three distinct elements in the image that indicate the art style is neopop and explain how each element contributes to this classification" }, { "from": "gpt", "value": "The answer is Textual evidence, visual style, cultural references" } ], "clip_score": 19.96875, "loss": 4.212205410003662 }, { "id": "identity_5736", "image": "AI2D/images/4844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of eruption does not form a cloud?\nChoices:\n(A) Strombolian\n(B) Pelean\n(C) Hawaiian\n(D) Plinian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.8125, "loss": 4.2491679191589355 }, { "id": "identity_195263", "image": "PMC-VQA/images/PMC3655489_fig2_204832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the radiolucency observed on the left or right side of the patient's jaw?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 28.9375, "loss": 4.120883941650391 }, { "id": "identity_212776", "image": "VQA2.0/images/285296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field wearing a cap?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.40625, "loss": 5.006112098693848 }, { "id": "identity_127948", "image": "TextVQA/images/00922ab06cc71238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape to the left of the word \"INVADER\"?" }, { "from": "gpt", "value": "The answer is Arrow" } ], "clip_score": 24.4375, "loss": 4.672841548919678 }, { "id": "identity_283667", "image": "A-OKVQA/images/264805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which setting on his camera phone could potentially harm his eyes when activated?\nChoices:\n(A) flash\n(B) lightbulb\n(C) speaker\n(D) camera" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.9453125, "loss": 4.424374580383301 }, { "id": "identity_4483", "image": "ScienceQA/images/6673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Leo's experiment can best answer.\nChoices:\n(A) Do steel nails rust in fewer days when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails take fewer days to rust in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 4.106174945831299 }, { "id": "identity_282329", "image": "PMC-VQA/images/PMC8268606_Fig1_5131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the paravermal lesions typically found?\nChoices:\n(A) Left hemisphere\n(B) Right hemisphere\n(C) Both hemispheres\n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.339922904968262 }, { "id": "identity_69296", "image": "TQA/images/abc_question_images/cycle_water_11499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"T\" signify in this diagram?" }, { "from": "gpt", "value": "The answer is Runoff" } ], "clip_score": 24.21875, "loss": 4.189168453216553 }, { "id": "identity_294892", "image": "TextVQA/images/37b20486199a4dc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the name of this team?" }, { "from": "gpt", "value": "The answer is free gaza" } ], "clip_score": 16.609375, "loss": 5.377803802490234 }, { "id": "identity_335650", "image": "PMC-VQA/images/PMC6291948_Fig1_411066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the opacity found?\nChoices:\n(A) right lung\n(B) middle lung\n(C) left lung\n(D) both lungs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 4.672740936279297 }, { "id": "identity_288163", "image": "AI2D/images/3589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does G represent?\nChoices:\n(A) prostate gland\n(B) pubic bone\n(C) prepuce\n(D) urinary bladder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.484375, "loss": 4.359582901000977 }, { "id": "identity_11632", "image": "FigureQA/images/35163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.625, "loss": 4.570825576782227 }, { "id": "identity_243558", "image": "VQA2.0/images/397129.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest it is not cold out, and what is the likely season based on these indicators?" }, { "from": "gpt", "value": "The answer is It is likely spring or fall" } ], "clip_score": 12.796875, "loss": 4.529555320739746 }, { "id": "identity_305422", "image": "ScienceQA/images/20958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Knoxville\n(B) Jersey City\n(C) Trenton\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.544713020324707 }, { "id": "identity_300320", "image": "PMC-VQA/images/PMC6334377_fig3_423309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vertebrate is mentioned in the caption?\nChoices:\n(A) Cervical\n(B) Thoracic\n(C) Lumbar\n(D) Sacral" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.90625, "loss": 4.236848831176758 }, { "id": "identity_216518", "image": "A-OKVQA/images/238385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lamps are there in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 28.125, "loss": 4.77192497253418 }, { "id": "identity_22278", "image": "PMC-VQA/images/PMC5717843_Fig4_249361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the right sphenoid sinus?\nChoices:\n(A) Homogeneous signal \n(B) Low signal \n(C) High signal \n(D) Heterogeneous signal " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.21875, "loss": 4.297415256500244 }, { "id": "identity_284120", "image": "PMC-VQA/images/PMC9318489_FIG2_352586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the pulmonary infiltrates located? \nOptions:\n(A) Unilateral\n(B) Bilateral\n(C) Peripheral\n(D) Central" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 4.162806510925293 }, { "id": "identity_296652", "image": "Super-CLEVR/images/superCLEVR_new_001264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is positioned in front of the bus and behind the large cyan metallic thing?" }, { "from": "gpt", "value": "The answer is scooter" } ], "clip_score": 26.75, "loss": 4.065958023071289 }, { "id": "identity_295290", "image": "Super-CLEVR/images/superCLEVR_new_000795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer rubber articulated buses in front of the large matte car than the number of big objects to the left of the metallic bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 33.5, "loss": 4.974045753479004 }, { "id": "identity_329877", "image": "PMC-VQA/images/PMC5379728_Fig1_149081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to capture this image?\nChoices:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.65625, "loss": 4.369797706604004 }, { "id": "identity_191744", "image": "PMC-VQA/images/PMC5126839_Fig3_100226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label at the top of the image?" }, { "from": "gpt", "value": "The answer is h" } ], "clip_score": 15.5703125, "loss": 4.105796813964844 }, { "id": "identity_311469", "image": "PMC-VQA/images/PMC7921074_F3_380342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow on this point to?\nChoices:\n(A) Posterior elevation\n(B) Longitudinal elevation\n(C) Narrow glabrous patch\n(D) Anterior elevation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.76023006439209 }, { "id": "identity_244532", "image": "A-OKVQA/images/123555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the bus route, and what does the sign beside the green light forbid?" }, { "from": "gpt", "value": "The answer is North, left turns" } ], "clip_score": 23.390625, "loss": 5.931435585021973 }, { "id": "identity_42176", "image": "DVQA/images/bar_train_00000161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the lowest summed value across all categories?" }, { "from": "gpt", "value": "The answer is worry" } ], "clip_score": 20.40625, "loss": 4.0313496589660645 }, { "id": "identity_31614", "image": "A-OKVQA/images/119614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of cat is this?\nChoices:\n(A) persian\n(B) orange tabby\n(C) bengal\n(D) maine coon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.125, "loss": 4.25004243850708 }, { "id": "identity_281133", "image": "A-OKVQA/images/393630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options could provide protection from the sun? \n(A) bench\n(B) boat\n(C) tree shade\n(D) jackets" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.765625, "loss": 4.702746391296387 }, { "id": "identity_286943", "image": "A-OKVQA/images/427091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way is the child in the seating crossing the street showing improvement?\nChoices:\n(A) being pulled\n(B) mechanized wheelchair\n(C) being pushed\n(D) sheer will" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.609375, "loss": 4.139321327209473 }, { "id": "identity_1043", "image": "VQA-AS/images/10209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man taller than the tree?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.53125, "loss": 4.026184558868408 }, { "id": "identity_286464", "image": "ScienceQA/images/11047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following factors served as an independent variable in this experiment?\nChoices:\n(A) the length of the filtering straw\n(B) the number of Giardia that remained in the water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.890625, "loss": 4.231178283691406 }, { "id": "identity_13941", "image": "ScienceQA/images/10105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Mature fern plants have flat leaves. What are fern leaves called?\nChoices:\n(A) fronds\n(B) spore cases" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.46875, "loss": 4.0337982177734375 }, { "id": "identity_190806", "image": "PMC-VQA/images/PMC4651706_fig1_445703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which side of the body is the imaged leg located?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 27.265625, "loss": 4.645987033843994 }, { "id": "identity_292652", "image": "PMC-VQA/images/PMC8946311_cancers-14-01509-f002_230142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the black arrow in the legends of the diagram?\nChoices:\n(A) Tumor cells \n(B) TILs \n(C) Negative cells \n(D) Unknown cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.484375, "loss": 4.550309658050537 }, { "id": "identity_191599", "image": "PMC-VQA/images/PMC5378863_ccr3853-fig-0004_148735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the right eye as compared to the left eye in the image?" }, { "from": "gpt", "value": "The answer is The right eye appears normal with no thickening observed" } ], "clip_score": 26.6875, "loss": 4.557319164276123 }, { "id": "identity_194718", "image": "VQA2.0/images/52943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team uniform for the player in the foreground?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.015625, "loss": 4.163531303405762 }, { "id": "identity_288225", "image": "PMC-VQA/images/PMC7706203_Fig2_311534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of section displayed in the MRI image:\n(A) Sagittal section,\n(B) Coronal section,\n(C) Axial section,\n(D) Oblique section" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.0625, "loss": 4.593588352203369 }, { "id": "identity_279888", "image": "PMC-VQA/images/PMC7532635_Fig1_265536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the white arrows in the ultrasound image? Is it:\n(A) Blood vessels\n(B) Nerves\n(C) Muscles\n(D) Tissues" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.46875, "loss": 5.340873718261719 }, { "id": "identity_113517", "image": "VQA-AS/images/15288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books can be seen on the shelf?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.53125, "loss": 4.271238803863525 }, { "id": "identity_268417", "image": "AI2D/images/3346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the structures that protrude upwards on its head?\nChoices:\n(A) none of the above\n(B) antenna\n(C) labrum\n(D) thorax" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.287497043609619 }, { "id": "identity_338275", "image": "A-OKVQA/images/581183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these animals doing?\nChoices:\n(A) foraging\n(B) performing\n(C) mating\n(D) stampeding" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.354210376739502 }, { "id": "identity_106487", "image": "Super-CLEVR/images/superCLEVR_new_000232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly to the right of the green car?" }, { "from": "gpt", "value": "The answer is Sports Car" } ], "clip_score": 21.0625, "loss": 4.88538122177124 }, { "id": "identity_261865", "image": "ScienceQA/images/2441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Vermont?\nChoices:\n(A) Raleigh\n(B) Burlington\n(C) Montpelier\n(D) Providence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.4375, "loss": 4.14291524887085 }, { "id": "identity_63596", "image": "ScienceQA/images/17671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many planets in the table have a rock composition?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.125, "loss": 4.643893241882324 }, { "id": "identity_295782", "image": "VQA2.0/images/129914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the rocks form a straight line?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 12.8515625, "loss": 4.302430629730225 }, { "id": "identity_281661", "image": "A-OKVQA/images/417373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of screen is located furthest to the left in this cluttered room? Is it a crt, projector, phone lcd, or computer lcd?\nChoices:\n(A) crt\n(B) projector\n(C) phone lcd\n(D) computer lcd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.828125, "loss": 4.325926780700684 }, { "id": "identity_314817", "image": "ScienceQA/images/5265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) New Orleans\n(B) Raleigh\n(C) Baton Rouge\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.6875, "loss": 4.602232933044434 }, { "id": "identity_292337", "image": "ScienceQA/images/20629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In Amsterdam, which month typically has the lowest average temperature?\nChoices:\n(A) January\n(B) February\n(C) November" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.375, "loss": 4.263972282409668 }, { "id": "identity_334807", "image": "PMC-VQA/images/PMC4156216_f4-ol-08-04-1713_318275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the imaging suggest had happened in this area?\nChoices:\n(A) Infection\n(B) Hemorrhage\n(C) Abscess\n(D) Tumor relapse" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.203125, "loss": 4.607221603393555 }, { "id": "identity_307921", "image": "DVQA/images/bar_train_00027910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is rate" } ], "clip_score": 21.421875, "loss": 4.362542152404785 }, { "id": "identity_288775", "image": "PMC-VQA/images/PMC7503285_ijms-21-06163-f007_257393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which channel is located in the femur metaepiphysis?\n(A) sagittal channel\n(B) longitudinal channel\n(C) transverse channel\n(D) circular channel" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.277263641357422 }, { "id": "identity_270653", "image": "FigureQA/images/69628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes Indigo's smoothness level: (A) no or (B) yes?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.734375, "loss": 4.0718560218811035 }, { "id": "identity_145494", "image": "MapQA/images/map_12046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have numbers in the range of 836-963?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 29.734375, "loss": 4.184401035308838 }, { "id": "identity_284687", "image": "A-OKVQA/images/340440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the room being so small?\nChoices:\n(A) temporary arrangement\n(B) college dorm\n(C) small tenants\n(D) low rent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 4.515083312988281 }, { "id": "identity_311170", "image": "ScienceQA/images/7682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boston\n(B) Augusta\n(C) Cambridge\n(D) Plymouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.453125, "loss": 4.281794548034668 }, { "id": "identity_260331", "image": "TextVQA/images/003ef1b57db7ce3d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the items in the pockets of the two men and how do they relate to the central theme of the cartoon?" }, { "from": "gpt", "value": "The answer is The items symbolize misguided self-support schemes, mocking their efforts" } ], "clip_score": 19.59375, "loss": 4.032702445983887 }, { "id": "identity_283247", "image": "PMC-VQA/images/PMC8405323_fig2_48743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the radiographs?\nChoices:\n(A) The original surgical site \n(B) Dislodged screws \n(C) Postoperative changes after anterior fusion \n(D) Postoperative changes after posterior fusion." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.0625, "loss": 4.494605541229248 }, { "id": "identity_286913", "image": "PMC-VQA/images/PMC3633951_pone-0061900-g003_200304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrow heads represent in the given options?\nChoices:\n(A) Overlaid fluorescence \n(B) Bright field images \n(C) Focus of the chimeric blastocysts \n(D) ICMs location" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.640625, "loss": 4.837225914001465 }, { "id": "identity_269017", "image": "DVQA/images/bar_train_00067929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the lowest total accuracy?" }, { "from": "gpt", "value": "The answer is pair" } ], "clip_score": 31.953125, "loss": 4.322206497192383 }, { "id": "identity_65298", "image": "TQA/images/question_images/parts_microscope_7202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which component connects the base and barrel of the microscope?" }, { "from": "gpt", "value": "The answer is Arm" } ], "clip_score": 32.1875, "loss": 4.550856590270996 }, { "id": "identity_300711", "image": "Super-CLEVR/images/superCLEVR_new_001247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle behind the green object that is in front of the aircraft and the scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.984375, "loss": 5.183123588562012 }, { "id": "identity_141289", "image": "DocVQA/images/rlvm0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the ESD project leader for this project?" }, { "from": "gpt", "value": "The answer is Martin Redding" } ], "clip_score": 19.390625, "loss": 4.217269420623779 }, { "id": "identity_308651", "image": "IconQA/images/40193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) yellow\n(B) white\n(C) neither; white and yellow are equally likely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.46875, "loss": 4.053333282470703 }, { "id": "identity_267791", "image": "VQA2.0/images/458374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What designs are present on the planes?" }, { "from": "gpt", "value": "The answer is color" } ], "clip_score": 19.21875, "loss": 4.74030065536499 }, { "id": "identity_26224", "image": "Super-CLEVR/images/superCLEVR_new_000156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the car that is the same color as the fighter?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 27.578125, "loss": 4.921229362487793 }, { "id": "identity_281445", "image": "A-OKVQA/images/166069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for him having his shirt off?\nChoices:\n(A) stolen\n(B) confused\n(C) warm weather\n(D) can't find" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.6171875, "loss": 4.622598171234131 }, { "id": "identity_7738", "image": "Super-CLEVR/images/superCLEVR_new_001609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a tiny green object that is to the left of the big purple bus; what is its shape?" }, { "from": "gpt", "value": "The answer is suv" } ], "clip_score": 30.1875, "loss": 4.34219217300415 }, { "id": "identity_300934", "image": "AI2D/images/2493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If A is Larvae, what is F?\nChoices:\n(A) Larvae\n(B) Adult\n(C) Egg\n(D) Pupae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.890625, "loss": 4.117280960083008 }, { "id": "identity_187100", "image": "VQA2.0/images/387264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage is in the glass on the table?" }, { "from": "gpt", "value": "The answer is Wine or spirit" } ], "clip_score": 16.65625, "loss": 4.0820465087890625 }, { "id": "identity_296280", "image": "PMC-VQA/images/PMC6387555_Fig2_440103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the image provided?\nChoices:\n(A) Abnormal lung parenchyma \n(B) Prominent pulmonary vasculature \n(C) Heart \n(D) Lungs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.296875, "loss": 4.33773136138916 }, { "id": "identity_306326", "image": "A-OKVQA/images/92721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle are they?\nChoices:\n(A) van\n(B) bus\n(C) truck\n(D) train" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.6875, "loss": 4.019207954406738 }, { "id": "identity_297279", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the small short arrows in the MR images?\nChoices:\n(A) Internal necrosis \n(B) Marked enhancement \n(C) Subcutaneous soft tissue thickening \n(D) Internal bleeding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.984375, "loss": 4.682956695556641 }, { "id": "identity_31492", "image": "PMC-VQA/images/PMC6536547_Fig8_485626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the higher magnification view of the rectangular dotted area in the center of the image? \nChoices:\n(A) Bacteria \n(B) Fungi \n(C) Plant cells \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.859375, "loss": 4.286573886871338 }, { "id": "identity_213617", "image": "A-OKVQA/images/70927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of barrier is separating the animals from the background area?" }, { "from": "gpt", "value": "The answer is A white horizontal bar fence" } ], "clip_score": 22.859375, "loss": 4.414732933044434 }, { "id": "identity_199296", "image": "PMC-VQA/images/PMC7763342_brainsci-10-00963-f002_328088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the aneurysm pointing?" }, { "from": "gpt", "value": "The answer is Superiorly" } ], "clip_score": 27.546875, "loss": 4.5008416175842285 }, { "id": "identity_199366", "image": "PMC-VQA/images/PMC7814755_hbm25261-fig-0002_345124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the structure directly below the blue area?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.9375, "loss": 4.10477876663208 }, { "id": "identity_259194", "image": "TextVQA/images/75abd89790812733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boxes with the Dole logo are visible in the image, and what other items are placed on or around these boxes?" }, { "from": "gpt", "value": "The answer is Two boxes, yellow crate on one box" } ], "clip_score": 13.6484375, "loss": 4.957443714141846 }, { "id": "identity_276642", "image": "A-OKVQA/images/141426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which species of animal are the largest mammals present here?\nChoices:\n(A) bovine\n(B) ovine\n(C) porcine\n(D) equine" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.65625, "loss": 4.29190731048584 }, { "id": "identity_29242", "image": "TabMWP/images/28055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 4.487978458404541 }, { "id": "identity_286979", "image": "PMC-VQA/images/PMC7854338_FIG3_358225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the blue arrow in the image provided?\nChoices:\n(A) Lateral ventricles \n(B) 3rd ventricle \n(C) Sub ependymal Giant Cell Astrocytomas \n(D) Tumor" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.71875, "loss": 4.300891399383545 }, { "id": "identity_1193", "image": "FigureQA/images/62424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Bubblegum have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.703125, "loss": 4.1197590827941895 }, { "id": "identity_22502", "image": "PMC-VQA/images/PMC8201232_molecules-26-03451-f002_467501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the sample being imaged in the TEM images? \nChoices:\n(A) Bacteria \n(B) A metal alloy \n(C) A plant cell \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5625, "loss": 4.4040327072143555 }, { "id": "identity_282280", "image": "A-OKVQA/images/496617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the current occupancy status of this room?\nChoices:\n(A) partial\n(B) one person\n(C) full\n(D) empty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.765625, "loss": 4.456302642822266 }, { "id": "identity_208617", "image": "A-OKVQA/images/315065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the building?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 12.5234375, "loss": 4.0438761711120605 }, { "id": "identity_17895", "image": "DVQA/images/bar_train_00140852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth bar from the left in each group?" }, { "from": "gpt", "value": "The answer is verse" } ], "clip_score": 13.8828125, "loss": 4.197564125061035 }, { "id": "identity_266810", "image": "TQA/images/question_images/geologic_time_6923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the period that follows the Triassic period in the geological timescale?\nChoices:\n(A) jurassic\n(B) quaternary\n(C) cretaceous\n(D) tertiary" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5625, "loss": 4.5737810134887695 }, { "id": "identity_303532", "image": "AI2D/images/2154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the adult medusa form shown?\nChoices:\n(A) c\n(B) a\n(C) d\n(D) b" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.9375, "loss": 4.132950782775879 }, { "id": "identity_317787", "image": "IconQA/images/27218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.75, "loss": 4.282193183898926 }, { "id": "identity_20850", "image": "PMC-VQA/images/PMC8977489_F5_242901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the body is shown in the images? \nChoices:\n(A) Left \n(B) Right \n(C) Both \n(D) Cannot be determined " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.234375, "loss": 4.10488224029541 }, { "id": "identity_308531", "image": "FigureQA/images/68572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.6640625, "loss": 4.014693737030029 }, { "id": "identity_324304", "image": "PMC-VQA/images/PMC6819398_Fig8_57937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the bone destruction observed in the patient's body?\nChoices:\n(A) Right femur\n(B) Left tibia\n(C) Fifth lumbar vertebra\n(D) Right shoulder blade" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.546875, "loss": 4.258053779602051 }, { "id": "identity_587", "image": "PMC-VQA/images/PMC8020193_FIG1_409560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where in the image is the spontaneous pneumomediastinum located?\nChoices:\n(A) Base of the right neck \n(B) Base of the left neck \n(C) Center of the chest \n(D) Base of the spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.84375, "loss": 4.477785110473633 }, { "id": "identity_329257", "image": "PMC-VQA/images/PMC3834309_pone-0080939-g005_244798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is this?\nChoices:\n(A) CT Scan\n(B) PET Scan\n(C) MRI\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.25, "loss": 4.547740459442139 }, { "id": "identity_145432", "image": "DocVQA/images/jlxn0226_41.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word in the document?" }, { "from": "gpt", "value": "The answer is motion" } ], "clip_score": 19.546875, "loss": 5.929093360900879 }, { "id": "identity_270365", "image": "IconQA/images/61099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth one if the first picture is a tractor, and the choices are sun, tractor, and barn?\nChoices:\n(A) sun\n(B) tractor\n(C) barn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.091444969177246 }, { "id": "identity_186120", "image": "A-OKVQA/images/33208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars can be seen in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 18.09375, "loss": 4.849929332733154 }, { "id": "identity_319784", "image": "Super-CLEVR/images/superCLEVR_new_000533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a big brown shiny fighter to the left of a large motorbike that is left of a large blue shiny motorbike that is on the left side of a big green rubber car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.9375, "loss": 4.912107944488525 }, { "id": "identity_9001", "image": "A-OKVQA/images/578137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue bin on the left for?\nChoices:\n(A) biohazardous waste\n(B) compost\n(C) garbage\n(D) recycling" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.5625, "loss": 4.370055675506592 }, { "id": "identity_125416", "image": "A-OKVQA/images/286376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary graphic element on the skateboard in the center?" }, { "from": "gpt", "value": "The answer is A wolf" } ], "clip_score": 25.484375, "loss": 4.2642035484313965 }, { "id": "identity_336419", "image": "DocVQA/images/fqhm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of Channing H. Lushbough?" }, { "from": "gpt", "value": "The answer is director and executive secretary" } ], "clip_score": 20.5, "loss": 4.4217329025268555 }, { "id": "identity_322775", "image": "AI2D/images/3860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the plant leaf part indicated by the letter C in the diagram?\nChoices:\n(A) margin\n(B) vein\n(C) leaf stalk\n(D) lamina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.78125, "loss": 4.400258541107178 }, { "id": "identity_213037", "image": "PMC-VQA/images/PMC6826538_genes-10-00762-f002_60464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangular objects can be counted in the main image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.90625, "loss": 4.3556036949157715 }, { "id": "identity_326114", "image": "PMC-VQA/images/PMC8702715_F1_147743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of thrombus is indicated by the dotted arrow in the image?\nChoices:\n(A) white thrombus,\n(B) red thrombus,\n(C) black thrombus,\n(D) blue thrombus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.484375, "loss": 4.0715484619140625 }, { "id": "identity_22470", "image": "A-OKVQA/images/421810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What allows the red kites to fly?\nChoices:\n(A) tails\n(B) doldrums\n(C) poles\n(D) string" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.3035173416137695 }, { "id": "identity_278316", "image": "AI2D/images/3543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is considered the outer part of the cell?\n(A) Plasma membrane\n(B) Microvilli\n(C) Lysosome\n(D) Mitochondrion" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.4375, "loss": 4.0936737060546875 }, { "id": "identity_30288", "image": "IconQA/images/103557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a barn. Which picture is fourth?\nChoices:\n(A) tractor\n(B) sun\n(C) barn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.5, "loss": 4.1453447341918945 }, { "id": "identity_182162", "image": "A-OKVQA/images/132141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 14.609375, "loss": 4.424459457397461 }, { "id": "identity_214082", "image": "Super-CLEVR/images/superCLEVR_new_001014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the blue object have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.9375, "loss": 4.058200836181641 }, { "id": "identity_321150", "image": "A-OKVQA/images/437776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of surface does this blue vehicle run on?\nChoices:\n(A) air current\n(B) road\n(C) water\n(D) rail" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3046875, "loss": 4.22208309173584 }, { "id": "identity_137827", "image": "DocVQA/images/fmvj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size group of the company?" }, { "from": "gpt", "value": "The answer is Decile 4" } ], "clip_score": 14.9765625, "loss": 4.196164131164551 }, { "id": "identity_312482", "image": "DocVQA/images/xggn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the surgeon listed in the form?" }, { "from": "gpt", "value": "The answer is Kindsfater" } ], "clip_score": 22.21875, "loss": 4.799780368804932 }, { "id": "identity_336071", "image": "IconQA/images/78279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) certain\n(B) unlikely\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.318922996520996 }, { "id": "identity_198288", "image": "A-OKVQA/images/170683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is depicted on the yellow sign pointing to the left?" }, { "from": "gpt", "value": "The answer is Taxi" } ], "clip_score": 26.640625, "loss": 4.610825061798096 }, { "id": "identity_285199", "image": "PMC-VQA/images/PMC3524754_F2_173978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is depicted in the cross-sectional image shown?\nChoices:\n(A) Forearm\n(B) Thigh\n(C) Head\n(D) Foot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.03125, "loss": 4.191987991333008 }, { "id": "identity_292620", "image": "PMC-VQA/images/PMC6445604_F4_460291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the white-dashed lines in the OCT B-scans?\nChoices:\n(A) Healthy retina \n(B) Abnormal blood vessels \n(C) Choroidal hypertransmission signifying GA \n(D) Normal optic nerve" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.65625, "loss": 4.560478210449219 }, { "id": "identity_302980", "image": "FigureQA/images/13155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum for Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.9375, "loss": 4.260894775390625 }, { "id": "identity_29909", "image": "TextVQA/images/03e77cd31f9d6306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the man wearing green in the front?" }, { "from": "gpt", "value": "The answer is benedita" } ], "clip_score": 13.9921875, "loss": 5.639461517333984 }, { "id": "identity_954", "image": "PMC-VQA/images/PMC7325026_Fig2_201874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view is shown for the Munchkin cat? \nChoices:\n(A) Anterior view \n(B) Ventral view \n(C) Lateral view \n(D) Dorsal view " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.453125, "loss": 4.658991813659668 }, { "id": "identity_321066", "image": "PMC-VQA/images/PMC8299116_F1_14550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are shown in these MRI images?\nChoices:\n(A) Hyperintense\n(B) Hypointense\n(C) Isointense\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.09375, "loss": 4.314357280731201 }, { "id": "identity_267034", "image": "AI2D/images/73.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism benefits from the carbon dioxide released by fish among the following choices: animals, people, plants, or dirt?\nChoices:\n(A) animals\n(B) People\n(C) Plants\n(D) dirt" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.859375, "loss": 4.304205894470215 }, { "id": "identity_114967", "image": "VQA2.0/images/81188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.3515625, "loss": 4.431061744689941 }, { "id": "identity_135582", "image": "DocVQA/images/rryk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the initials at the bottom of the letter?" }, { "from": "gpt", "value": "The answer is ENC:st" } ], "clip_score": 15.25, "loss": 5.244481563568115 }, { "id": "identity_19000", "image": "FigureQA/images/4543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.140625, "loss": 4.176340103149414 }, { "id": "identity_265552", "image": "FigureQA/images/88953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Slate or Mint?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.28125, "loss": 4.098369121551514 }, { "id": "identity_311554", "image": "A-OKVQA/images/442322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What play is the best outcome for the batter?\nChoices:\n(A) home run\n(B) foul\n(C) walk\n(D) strike out" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 5.194601058959961 }, { "id": "identity_190451", "image": "A-OKVQA/images/37209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car parked on the left side of the street?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 19.46875, "loss": 4.0338850021362305 }, { "id": "identity_116341", "image": "TextVQA/images/3b3fb77c202ef6eb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the author of the book \"Writing Clear Essays\"?" }, { "from": "gpt", "value": "The answer is RB. Donald" } ], "clip_score": 17.296875, "loss": 5.313989162445068 }, { "id": "identity_284365", "image": "A-OKVQA/images/437516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item, typically worn by serious skateboarders, is the skateboarder not wearing in this scenario? \n(A) safety gear \n(B) chucks \n(C) shorts \n(D) sunglasses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.59375, "loss": 4.406942844390869 }, { "id": "identity_24676", "image": "A-OKVQA/images/59367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would you most likely see those long orange things?\nChoices:\n(A) classroom\n(B) doctor's office\n(C) pool\n(D) cafe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6875, "loss": 4.245268821716309 }, { "id": "identity_295598", "image": "PMC-VQA/images/PMC4000658_fig2_284438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of image represented by image A from the following options: (A) CT, (B) PET, (C) MR, (D) X-ray." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.03125, "loss": 4.243800163269043 }, { "id": "identity_325735", "image": "PMC-VQA/images/PMC6919442_F1_86269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the solid nodules located?\nChoices:\n(A) Right upper lobe\n(B) Right middle lobe\n(C) Right lower lobe\n(D) Left upper lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 4.923592567443848 }, { "id": "identity_320417", "image": "PMC-VQA/images/PMC8278359_FIG1_8842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did these images show?\nChoices:\n(A) Diagnosis of the injury \n(B) Fractures \n(C) Fistula \n(D) None of the above" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.859375, "loss": 4.341461181640625 }, { "id": "identity_263320", "image": "AI2D/images/3714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the small intestine:\n(A) O\n(B) N\n(C) L\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.03125, "loss": 4.568488597869873 }, { "id": "identity_207389", "image": "TextVQA/images/7e1c64432aef42fb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many human figures are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 12.8515625, "loss": 4.506094932556152 }, { "id": "identity_22810", "image": "PMC-VQA/images/PMC4464273_f3_395633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of animal was used for this study? \nChoices:\n(A) Rat \n(B) Guinea pig \n(C) Mouse \n(D) Rabbit " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.53125, "loss": 4.141632080078125 }, { "id": "identity_318608", "image": "ScienceQA/images/11461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a mineral or a rock?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.734375, "loss": 4.066300868988037 }, { "id": "identity_292112", "image": "PMC-VQA/images/PMC7882111_acm213151-fig-0006_367662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the left, middle, and right figures in this image?\nChoices:\n(A) Dose distribution of axial plane in the lung \n(B) Dose distribution of axial plane in the heart \n(C) Dose distribution of axial plane in the lens and parotid gland level \n(D) Dose distribution of a head CT scan." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.84375, "loss": 4.734671115875244 }, { "id": "identity_269733", "image": "TQA/images/question_images/cycle_rock_6729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the process through which igneous rocks are formed?\nChoices:\n(A) by cooling magma and lava\n(B) through melting of the igneous rock\n(C) by applying heat and pressure to sedimentary rock\n(D) through compaction of the sedimentary rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.703125, "loss": 4.025540828704834 }, { "id": "identity_330153", "image": "PMC-VQA/images/PMC5121968_Fig1_98781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anatomical region is shown in the midline sagittal image?\nChoices:\n(A) Shoulder joint\n(B) Knee joint\n(C) Hip joint\n(D) Elbow joint" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.125, "loss": 4.9441328048706055 }, { "id": "identity_313529", "image": "ScienceQA/images/7687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Delaware\n(B) New Hampshire\n(C) Illinois\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 4.138180732727051 }, { "id": "identity_264436", "image": "AI2D/images/4276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which layer is identified as F in the given options: seal, sandstone, gas-rich shale, or coalbed methane?\nChoices:\n(A) seal\n(B) sandstone\n(C) gas-rich shale\n(D) coalbed methane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.0, "loss": 4.293931007385254 }, { "id": "identity_201368", "image": "A-OKVQA/images/75299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of parking is prohibited according to the sign in the image?" }, { "from": "gpt", "value": "The answer is Two-way traffic keep right" } ], "clip_score": 17.0625, "loss": 4.4535675048828125 }, { "id": "identity_62533", "image": "FigureQA/images/49377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the second highest value?" }, { "from": "gpt", "value": "The answer is Medium Aqua" } ], "clip_score": 22.03125, "loss": 4.298017501831055 }, { "id": "identity_278940", "image": "TQA/images/abc_question_images/layers_of_atmosphere_18101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the letters P, M, J, and W, which letter is positioned vertically the highest?\nChoices:\n(A) P\n(B) M\n(C) J\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.109375, "loss": 4.102748394012451 }, { "id": "identity_33300", "image": "A-OKVQA/images/149679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is doing what the photo taker is doing with their leg considered?\nChoices:\n(A) spiritual\n(B) slightly rude\n(C) dangerous\n(D) perfectly normal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.828125, "loss": 4.183777809143066 }, { "id": "identity_334488", "image": "AI2D/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part in the diagram is the endosperm?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.71875, "loss": 4.392661094665527 }, { "id": "identity_257814", "image": "VQA2.0/images/42441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the horse pulling, and how many people are riding in it?" }, { "from": "gpt", "value": "The answer is Cart or carriage, three people" } ], "clip_score": 29.65625, "loss": 4.378704071044922 }, { "id": "identity_70044", "image": "ScienceQA/images/7817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the objects shown in the image?" }, { "from": "gpt", "value": "The answer is Providing entertainment or mechanical utility" } ], "clip_score": 17.390625, "loss": 4.776309490203857 }, { "id": "identity_13049", "image": "VQA-RAD/images/synpic32970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the lesion located?" }, { "from": "gpt", "value": "The answer is right lower lateral lung field" } ], "clip_score": 29.640625, "loss": 4.256995677947998 }, { "id": "identity_274712", "image": "ScienceQA/images/12371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate?\nChoices:\n(A) climate\n(B) weather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.203125, "loss": 4.738409996032715 }, { "id": "identity_264876", "image": "FigureQA/images/41913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.421875, "loss": 4.6340765953063965 }, { "id": "identity_226748", "image": "Super-CLEVR/images/superCLEVR_new_001379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same material as the tiny cyan thing and is located closest to the bicycle?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 31.6875, "loss": 4.689591407775879 }, { "id": "identity_267327", "image": "FigureQA/images/69098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the toughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.375, "loss": 4.29598331451416 }, { "id": "identity_302196", "image": "A-OKVQA/images/325184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which elevation are the animals compared to those observing them?\nChoices:\n(A) same\n(B) lower\n(C) changing\n(D) higher" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.21875, "loss": 4.297451019287109 }, { "id": "identity_310979", "image": "A-OKVQA/images/220390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place most likely located?\nChoices:\n(A) nepal\n(B) indonesia\n(C) bangladesh\n(D) tibet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.734375, "loss": 4.190070152282715 }, { "id": "identity_9741", "image": "IconQA/images/50752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is fifth?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.234375, "loss": 4.565484523773193 }, { "id": "identity_233337", "image": "VQA2.0/images/347037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it in the photo, and what evidence supports this conclusion?" }, { "from": "gpt", "value": "The answer is Early morning or late evening" } ], "clip_score": 21.296875, "loss": 4.166546821594238 }, { "id": "identity_25810", "image": "PMC-VQA/images/PMC3885112_RRT079F3_257329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is clearly visible in the computed tomography image? \nChoices:\n(A) Contour of the gross tumor volume \n(B) Muscles in the tongue \n(C) Applicator points in the tongue \n(D) Bone structure of the jaw " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.78125, "loss": 4.741412162780762 }, { "id": "identity_265059", "image": "VizWiz/images/VizWiz_train_00004313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.3203125, "loss": 4.096186637878418 }, { "id": "identity_311833", "image": "PMC-VQA/images/PMC7842001_Fig1_355063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis based on these findings?\nChoices:\n(A) osteoarthritis\n(B) rheumatoid arthritis\n(C) gout\n(D) systemic lupus erythematosus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.53125, "loss": 4.827314376831055 }, { "id": "identity_127162", "image": "A-OKVQA/images/444390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pendant lights are hanging above the kitchen island?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.390625, "loss": 4.274095058441162 }, { "id": "identity_188026", "image": "VQA2.0/images/270636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are visible on the kite?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.484375, "loss": 4.088733673095703 }, { "id": "identity_316479", "image": "ScienceQA/images/7351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Wichita\n(B) Lansing\n(C) Kansas City\n(D) Topeka" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.0625, "loss": 4.004120349884033 }, { "id": "identity_272450", "image": "VizWiz/images/VizWiz_train_00006416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be found on this page?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 10.59375, "loss": 4.144673824310303 }, { "id": "identity_332446", "image": "PMC-VQA/images/PMC7270124_Fig3_183801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass lesion located in patient 40?\nChoices:\n(A) Right insular lobe and inferior parietal lobe\n(B) Left insular lobe and inferior parietal lobe\n(C) Right frontal gyrus and temporal lobe\n(D) Left frontal gyrus and temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.609375, "loss": 4.188303470611572 }, { "id": "identity_5678", "image": "VizWiz/images/VizWiz_train_00005289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Please describe this page, thank you." }, { "from": "gpt", "value": "The answer is clothing sales ad" } ], "clip_score": 19.546875, "loss": 4.02742862701416 }, { "id": "identity_338257", "image": "PMC-VQA/images/PMC6376132_Fig3_436395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image was used to measure this? \nChoices:\n(A) CT scan, \n(B) MRI T2WI, \n(C) X-ray, \n(D) PET scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.62770414352417 }, { "id": "identity_105374", "image": "TextVQA/images/006c36098228cd2c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the symbol to the right of the sign indicating?" }, { "from": "gpt", "value": "The answer is Flammable material" } ], "clip_score": 19.453125, "loss": 4.320123672485352 }, { "id": "identity_223754", "image": "A-OKVQA/images/575406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the vehicle in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.734375, "loss": 4.1705121994018555 }, { "id": "identity_153794", "image": "TQA/images/abc_question_images/waves_17679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the point marked with the label 'K'" }, { "from": "gpt", "value": "The answer is Trough" } ], "clip_score": 24.09375, "loss": 4.442759990692139 }, { "id": "identity_7545", "image": "A-OKVQA/images/203702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they so distracted by the zebra?\nChoices:\n(A) is noisy\n(B) is unusual\n(C) is attacking\n(D) is famous" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.145247459411621 }, { "id": "identity_204864", "image": "VizWiz/images/VizWiz_train_00005812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of closure does the shirt have at the neckline?" }, { "from": "gpt", "value": "The answer is String tie closure" } ], "clip_score": 20.578125, "loss": 4.261751651763916 }, { "id": "identity_19550", "image": "FigureQA/images/3222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.3828125, "loss": 4.223267555236816 }, { "id": "identity_301983", "image": "ScienceQA/images/6672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the fish below?\nChoices:\n(A) salmon\n(B) bison" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.921875, "loss": 4.033790588378906 }, { "id": "identity_294439", "image": "A-OKVQA/images/8746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind his possession of the ball?\nChoices:\n(A) showing off\n(B) losing control\n(C) stole it\n(D) curious" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.3203125, "loss": 4.602563381195068 }, { "id": "identity_127010", "image": "PMC-VQA/images/PMC3948907_Fig2_273076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertebrae can be seen in the CT scan?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.328125, "loss": 4.456370830535889 }, { "id": "identity_186476", "image": "TextVQA/images/05640d9d140ec1d4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the establishment's signboard in the background to the left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.78125, "loss": 4.036383628845215 }, { "id": "identity_125660", "image": "TextVQA/images/b81c007d8b5e6a7a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clover-shaped balloons are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.40625, "loss": 4.201996803283691 }, { "id": "identity_290025", "image": "PMC-VQA/images/PMC3466236_pone-0047230-g003_159344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In relation to the coronal section, what is the orientation of the sagittal sections?\nChoices:\n(A) Vertical\n(B) Parallel\n(C) Perpendicular\n(D) Diagonal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 4.742560863494873 }, { "id": "identity_330427", "image": "DocVQA/images/qkjk0079_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of this person?" }, { "from": "gpt", "value": "The answer is director promotion" } ], "clip_score": 20.265625, "loss": 5.116011142730713 }, { "id": "identity_280356", "image": "ScienceQA/images/7921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells are eggs, considering they are stored inside the pistil?\nChoices:\n(A) female cells\n(B) male cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.3125, "loss": 4.3365583419799805 }, { "id": "identity_282644", "image": "PMC-VQA/images/PMC6476892_Fig3_469233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is revealed by the CT angiography scan?\nChoices:\n(A) Patent left EIA and CFA. \n(B) Successful repair of left EIA. \n(C) Patent right EIA and CFA. \n(D) Successful repair of right EIA." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.90625, "loss": 4.076416969299316 }, { "id": "identity_331709", "image": "A-OKVQA/images/21036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this blue float resemble?\nChoices:\n(A) lobster\n(B) horse\n(C) cow\n(D) egg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.078125, "loss": 4.125697135925293 }, { "id": "identity_7806", "image": "ScienceQA/images/1072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) soft\n(B) smooth\n(C) shiny" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.21875, "loss": 4.195328712463379 }, { "id": "identity_16488", "image": "ScienceQA/images/16378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do moss plants produce seeds?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 4.114158630371094 }, { "id": "identity_279473", "image": "A-OKVQA/images/571842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is represented by the blue balloon on the right? Is it a whale, dog, unicorn, or dolphin?\nChoices:\n(A) whale\n(B) dog\n(C) unicorn\n(D) dolphin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.5546875, "loss": 4.934731483459473 }, { "id": "identity_4136", "image": "AI2D/images/3116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the above diagram display?\nChoices:\n(A) Papaya leaf\n(B) Plant species\n(C) Animal cell\n(D) Star formation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.484375, "loss": 4.5943450927734375 }, { "id": "identity_282703", "image": "A-OKVQA/images/480005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which location is the individual transporting the vegetables on the boat?\nChoices:\n(A) throwing away\n(B) to market\n(C) home\n(D) church" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.984375, "loss": 4.6735453605651855 }, { "id": "identity_145698", "image": "DocVQA/images/xyfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of the letter?" }, { "from": "gpt", "value": "The answer is Horace L. Sipple" } ], "clip_score": 22.96875, "loss": 4.064165115356445 }, { "id": "identity_287468", "image": "PMC-VQA/images/PMC3681988_pone-0066137-g022_211303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the micrograph, which specific region of the brain is depicted?\nChoices:\n(A) Mid-brain\n(B) Posterior region\n(C) Anterior region\n(D) Unidentifiable region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.078125, "loss": 4.493470668792725 }, { "id": "identity_273908", "image": "A-OKVQA/images/207548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a possible outcome of sitting on this bench? \n(A) experience seasickness\n(B) get sunburned\n(C) get lost\n(D) have sandy feet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 4.663259029388428 }, { "id": "identity_299087", "image": "PMC-VQA/images/PMC3858590_Fig2_250830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What radiographic finding is present in the right breast?\nChoices:\n(A) Decreased breast density\n(B) Increased breast density\n(C) Mass in the upper outer breast\n(D) Calcifications in the lower inner breast" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.3125, "loss": 4.5015363693237305 }, { "id": "identity_300925", "image": "VQA2.0/images/455252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this look like?" }, { "from": "gpt", "value": "The answer is dragon" } ], "clip_score": 12.984375, "loss": 4.151212215423584 }, { "id": "identity_204257", "image": "TextVQA/images/96969f648613e3f7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pink numbered tags are visible on the map?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 25.515625, "loss": 4.099967002868652 }, { "id": "identity_155388", "image": "ScienceQA/images/6093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface the animal is standing on?" }, { "from": "gpt", "value": "The answer is Rough, likely ground or land" } ], "clip_score": 22.34375, "loss": 4.412092208862305 }, { "id": "identity_127335", "image": "TextVQA/images/005cdc41f47be514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the signboard of the jeweler being advertised?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 28.15625, "loss": 4.04669189453125 }, { "id": "identity_147958", "image": "DocVQA/images/qndd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the third student mentioned in the document?" }, { "from": "gpt", "value": "The answer is Collins" } ], "clip_score": 19.0625, "loss": 4.375351428985596 }, { "id": "identity_210160", "image": "A-OKVQA/images/388382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the balcony of the building on the right?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.796875, "loss": 4.567267417907715 }, { "id": "identity_324514", "image": "PMC-VQA/images/PMC4289560_Fig1_349859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which parts of the eye exhibit hyperfluorescence?\nChoices:\n(A) Cornea\n(B) Iris\n(C) Retinal vessels\n(D) Choroid" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.171875, "loss": 4.447963237762451 }, { "id": "identity_275863", "image": "AI2D/images/535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary food source of spiders?\nChoices:\n(A) seed eating birds\n(B) herbivorous insects\n(C) rabbits\n(D) squirrels" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.34375, "loss": 4.524043083190918 }, { "id": "identity_286774", "image": "A-OKVQA/images/138347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the available options for using the bathroom in this location?\nChoices:\n(A) behind tree\n(B) male restroom\n(C) outhouse\n(D) female restroom" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 5.671875, "loss": 4.077576637268066 }, { "id": "identity_326731", "image": "IconQA/images/65513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) impossible\n(B) probable\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.152984619140625 }, { "id": "identity_278062", "image": "PlotQA/images/121487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which difference is greater: the information index in Uruguay between 2013 and 2014 or the information index in the Philippines between 2013 and 2014?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.046875, "loss": 4.260004997253418 }, { "id": "identity_29603", "image": "DVQA/images/bar_train_00181430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has lowest accuracy for any dataset?" }, { "from": "gpt", "value": "The answer is output" } ], "clip_score": 32.9375, "loss": 4.1505889892578125 }, { "id": "identity_283986", "image": "IconQA/images/81862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape of this object from the following options: sphere, cylinder, cone, cube.\nChoices:\n(A) sphere\n(B) cylinder\n(C) cone\n(D) cube" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.0625, "loss": 4.184902667999268 }, { "id": "identity_248911", "image": "A-OKVQA/images/437180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the context of the image, what are the possible reasons for the presence of the blue bottle, and which one is the most likely based on the activities shown?" }, { "from": "gpt", "value": "The answer is thirst" } ], "clip_score": 15.0859375, "loss": 6.313424587249756 }, { "id": "identity_108988", "image": "Super-CLEVR/images/superCLEVR_new_001061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bus located behind the blurred area?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 24.6875, "loss": 5.090961933135986 }, { "id": "identity_31834", "image": "PMC-VQA/images/PMC4630249_Fig6_439868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the patient's parotid gland has the cystic mass? \nChoices:\n(A) Left \n(B) Right \n(C) Both \n(D) Not mentioned " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.109375, "loss": 4.027236461639404 }, { "id": "identity_264331", "image": "PMC-VQA/images/PMC7939567_F0002_386113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the MRI scan show as the result?\nChoices:\n(A) Right thalamus infarction \n(B) No abnormality detected in either thalamus \n(C) Left thalamus infarction \n(D) Spinal cord injury" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.859375, "loss": 4.112441539764404 }, { "id": "identity_188909", "image": "VQA2.0/images/81749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train at platform 4?" }, { "from": "gpt", "value": "The answer is Dark blue or navy" } ], "clip_score": 29.453125, "loss": 4.078122615814209 }, { "id": "identity_286055", "image": "A-OKVQA/images/3366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event or situation do the horse and rider depicted in the image belong to?\nChoices:\n(A) runaway horse\n(B) parade\n(C) rodeo roundup\n(D) escape" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.796875, "loss": 4.416686058044434 }, { "id": "identity_32113", "image": "ScienceQA/images/11622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.1875, "loss": 4.105684757232666 }, { "id": "identity_290827", "image": "TextVQA/images/672dc94dc228aaff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is located below the symbol in the bottom left corner?" }, { "from": "gpt", "value": "The answer is vibration" } ], "clip_score": 11.4140625, "loss": 4.704381942749023 }, { "id": "identity_55359", "image": "DocVQA/images/xggn0226_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of film copies were received?" }, { "from": "gpt", "value": "The answer is Originals" } ], "clip_score": 20.703125, "loss": 5.013565540313721 }, { "id": "identity_134196", "image": "DocVQA/images/klvh0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used for this report?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 23.9375, "loss": 4.68670129776001 }, { "id": "identity_288263", "image": "PMC-VQA/images/PMC6020204_Fig2_333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to uncover the tumor in Meckel's cave?\nOptions:\n(A) CT scan\n(B) PET scan\n(C) T1-weighted axial images\n(D) T2-weighted axial images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.34375, "loss": 4.23146915435791 }, { "id": "identity_305508", "image": "PMC-VQA/images/PMC7683662_Fig3_303608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the bone is being replaced here?\nChoices:\n(A) Periosteum \n(B) Bone marrow \n(C) Bony trabeculae \n(D) Cortical bone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.296875, "loss": 4.521144866943359 }, { "id": "identity_297515", "image": "A-OKVQA/images/377589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what type of surface are these individuals seated? \nOptions:\n(A) ash\n(B) sand\n(C) marble\n(D) concrete" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8203125, "loss": 4.543771266937256 }, { "id": "identity_305081", "image": "DocVQA/images/hgcg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the agenda for this meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Note" } ], "clip_score": 26.359375, "loss": 4.261528491973877 }, { "id": "identity_263581", "image": "TQA/images/question_images/earth_moon_phases_138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which phase of the moon follows the waxing crescent phase? Is it:\n(A) waning gibbous\n(B) first quarter\n(C) waxing gibbous\n(D) full moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.90625, "loss": 4.252737998962402 }, { "id": "identity_116187", "image": "VQA2.0/images/330402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the enclosure?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.734375, "loss": 4.063739776611328 }, { "id": "identity_292213", "image": "A-OKVQA/images/296797.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way are individuals being transported to this location?\nChoices:\n(A) taxi\n(B) cable car\n(C) steam train\n(D) mule" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.15350866317749 }, { "id": "identity_114026", "image": "VQA2.0/images/267734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle directly reflected in the mirror?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 28.25, "loss": 4.2710371017456055 }, { "id": "identity_132637", "image": "VQA2.0/images/368490.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible people are wearing glasses?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.9453125, "loss": 5.373510360717773 }, { "id": "identity_239777", "image": "VQA2.0/images/172370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three indicators in the image that suggest the person is not a good snowboarder?" }, { "from": "gpt", "value": "The answer is Sitting posture, assistance, stationary" } ], "clip_score": 26.8125, "loss": 4.461539268493652 }, { "id": "identity_277478", "image": "PMC-VQA/images/PMC5646889_F2_228583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the images among the following options: computed tomography, magnetic resonance imaging, positron emission tomography, or X-ray?\nChoices:\n(A) computed tomography \n(B) magnetic resonance imaging \n(C) positron emission tomography \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.5625, "loss": 4.166700839996338 }, { "id": "identity_217294", "image": "VQA2.0/images/39580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are holding umbrellas?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.828125, "loss": 4.3137712478637695 }, { "id": "identity_321645", "image": "PMC-VQA/images/PMC8425862_advs2784-fig-0007_53973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate in this image?\nChoices:\n(A) White matter \n(B) Grey matter \n(C) Endothelial cells \n(D) Basal lamina membrane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.65625, "loss": 4.2470879554748535 }, { "id": "identity_33989", "image": "IconQA/images/74273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of triangles even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.28125, "loss": 4.123892784118652 }, { "id": "identity_24616", "image": "ScienceQA/images/18992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 4.069811820983887 }, { "id": "identity_288322", "image": "TextVQA/images/7e12d937e6e90065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the clock on the stove, what is the current time?" }, { "from": "gpt", "value": "The answer is thursdays until 9pm" } ], "clip_score": 29.484375, "loss": 4.2804646492004395 }, { "id": "identity_293360", "image": "VQA2.0/images/409681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of hat is the person closest to wearing?" }, { "from": "gpt", "value": "The answer is sun" } ], "clip_score": 14.703125, "loss": 4.7526140213012695 }, { "id": "identity_287367", "image": "PMC-VQA/images/PMC8399726_Fig4_46470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of image displayed in the figure?\nOptions:\n(A) Sagittal T1-weighted image\n(B) Axial T2-weighted image\n(C) Coronal T2-weighted image\n(D) Sagittal T2-weighted image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.3125, "loss": 4.007329940795898 }, { "id": "identity_198097", "image": "VQA2.0/images/203036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing a vest in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.09375, "loss": 4.589714050292969 }, { "id": "identity_1370", "image": "TextVQA/images/5129f8041aa92388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the title of the second section on the first page?" }, { "from": "gpt", "value": "The answer is hypocrifis" } ], "clip_score": 13.5078125, "loss": 4.511645793914795 }, { "id": "identity_121327", "image": "Super-CLEVR/images/superCLEVR_new_000941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is both a vehicle and located to the left of the purple bicycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 30.96875, "loss": 4.238144874572754 }, { "id": "identity_264479", "image": "PMC-VQA/images/PMC9005803_F1_252228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the yellow arrow in the image?\nChoices:\n(A) The normal ventricle \n(B) Brain atrophy \n(C) Enlarged ventricle in acute stage \n(D) Changes after cocktail therapy." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.875, "loss": 4.196331977844238 }, { "id": "identity_338194", "image": "IconQA/images/26701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.078125, "loss": 4.036679267883301 }, { "id": "identity_336560", "image": "PMC-VQA/images/PMC4100749_F1_306570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the lesion present in this area?\nChoices:\n(A) Hypodense \n(B) Hematoma \n(C) Hyperdense \n(D) Hyperintense" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.296875, "loss": 4.46014404296875 }, { "id": "identity_307822", "image": "AI2D/images/3077.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cell is shown in the diagram?\nChoices:\n(A) animal cell\n(B) Eukaryotic cell\n(C) plant cell\n(D) lysosome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 4.012093544006348 }, { "id": "identity_289387", "image": "PMC-VQA/images/PMC5617943_diagnostics-07-00043-f001_221034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the pattern of pulmonary uptake shown in the image. Is it:\n(A) Focal\n(B) Diffuse\n(C) Peripheral\n(D) Central" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.2907257080078125 }, { "id": "identity_288858", "image": "PMC-VQA/images/PMC5271150_Fig7_120090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the left side of the schematic diagram depict?\nChoices:\n(A) 3D representation of a dendrimer \n(B) A building \n(C) 2D representation of a predator \n(D) 2D representation of a dendrimer containing two generations" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.5390625, "loss": 4.189018249511719 }, { "id": "identity_65084", "image": "ScienceQA/images/1178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature directly east of the orange dot?" }, { "from": "gpt", "value": "The answer is Alabama" } ], "clip_score": 32.90625, "loss": 4.655555725097656 }, { "id": "identity_126224", "image": "VQA-AS/images/17774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.296875, "loss": 4.175372123718262 }, { "id": "identity_273307", "image": "AI2D/images/3661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the variable j represent?\nChoices:\n(A) veins\n(B) left ventricle\n(C) aorta\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.625, "loss": 4.342594623565674 }, { "id": "identity_285312", "image": "PMC-VQA/images/PMC6952038_FIG1_97204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture this image? \nOptions:\n(A) MRI\n(B) X-Ray\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.421875, "loss": 4.221735954284668 }, { "id": "identity_31952", "image": "IconQA/images/91826.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a boat. Which picture is ninth?\nChoices:\n(A) bucket\n(B) crab\n(C) boat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.1875, "loss": 4.14174222946167 }, { "id": "identity_315925", "image": "ScienceQA/images/3969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Salem\n(B) Helena\n(C) Knoxville\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.828125, "loss": 4.5211029052734375 }, { "id": "identity_27806", "image": "VizWiz/images/VizWiz_train_00015623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of purse is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.015625, "loss": 4.003606796264648 }, { "id": "identity_35863", "image": "IconQA/images/28199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.3125, "loss": 4.1250691413879395 }, { "id": "identity_60041", "image": "DocVQA/images/gsph0078_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the RJR participants mentioned in the invoice?" }, { "from": "gpt", "value": "The answer is Phillips, Fackler, Kelm, Jerich" } ], "clip_score": 26.34375, "loss": 4.307442665100098 }, { "id": "identity_192336", "image": "VQA2.0/images/305916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many waves can be seen breaking in the ocean?" }, { "from": "gpt", "value": "The answer is Three waves" } ], "clip_score": 17.671875, "loss": 4.4996795654296875 }, { "id": "identity_6202", "image": "TabMWP/images/16586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.359375, "loss": 4.297904014587402 }, { "id": "identity_323416", "image": "A-OKVQA/images/8019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person with the object hope for here?\nChoices:\n(A) wind\n(B) dog\n(C) snow\n(D) rain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.734375, "loss": 4.752554893493652 }, { "id": "identity_189942", "image": "PMC-VQA/images/PMC5745994_Fig1_257987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the hand in the image?" }, { "from": "gpt", "value": "The answer is Supine position" } ], "clip_score": 25.859375, "loss": 4.450528621673584 }, { "id": "identity_44878", "image": "DocVQA/images/jggn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of motion for flexion as indicated on the form?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 16.34375, "loss": 4.0472893714904785 }, { "id": "identity_264098", "image": "A-OKVQA/images/43256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located behind the orange cabinet?\nChoices:\n(A) window\n(B) wall\n(C) lamp\n(D) blind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 4.0504231452941895 }, { "id": "identity_82777", "image": "Geometry3K/images/1703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BFC?" }, { "from": "gpt", "value": "The answer is m \\angle 2 + 64" } ], "clip_score": 33.40625, "loss": 4.373436450958252 }, { "id": "identity_24430", "image": "A-OKVQA/images/51501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is woman using an umbrella?\nChoices:\n(A) decoration\n(B) sun protection\n(C) exercise\n(D) rain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.087774753570557 }, { "id": "identity_331994", "image": "IconQA/images/73427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a bucket. Which image comes second?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.84375, "loss": 4.1707329750061035 }, { "id": "identity_97916", "image": "CLEVR-Math/images/CLEVR_train_061841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the golden sphere represents the sun, which object could represent the Earth in terms of relative size?" }, { "from": "gpt", "value": "The answer is The red cube" } ], "clip_score": 24.390625, "loss": 4.097790718078613 }, { "id": "identity_197414", "image": "A-OKVQA/images/458308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the horse's blanket?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.1875, "loss": 4.139297962188721 }, { "id": "identity_291348", "image": "TextVQA/images/d8399cc5e867952d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the hotel?" }, { "from": "gpt", "value": "The answer is republican hotel" } ], "clip_score": 16.4375, "loss": 4.3457159996032715 }, { "id": "identity_279527", "image": "IconQA/images/9559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a circle using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.115785121917725 }, { "id": "identity_187823", "image": "TextVQA/images/4966c6ae95fc4ca7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many propellers are visible on the aircraft?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.25, "loss": 4.240707874298096 }, { "id": "identity_77488", "image": "TQA/images/question_images/faults_1754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of motion is depicted in the third image?" }, { "from": "gpt", "value": "The answer is Horizontal motion" } ], "clip_score": 26.546875, "loss": 4.290464401245117 }, { "id": "identity_276338", "image": "ScienceQA/images/11735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following animals, which one is adapted to camouflage among green leaves?\nChoices:\n(A) spiny orb-weaver\n(B) huntsman spider" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.269547462463379 }, { "id": "identity_277035", "image": "AI2D/images/3314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the outermost part of a cell?\n(A) centriole\n(B) cell membrane\n(C) ribosome\n(D) mitochondria" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.28125, "loss": 4.276915550231934 }, { "id": "identity_7037", "image": "A-OKVQA/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the round item seen on the floor and plugged into the wall clean?\nChoices:\n(A) shoes\n(B) floors\n(C) dishes\n(D) walls" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0703125, "loss": 4.479030609130859 }, { "id": "identity_305376", "image": "PlotQA/images/42912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which place was the government consumption in 1960 the lowest?" }, { "from": "gpt", "value": "The answer is Benin" } ], "clip_score": 26.34375, "loss": 4.195777416229248 }, { "id": "identity_1206", "image": "PMC-VQA/images/PMC8815180_Fig2_184364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being ligated in this image?\nChoices:\n(A) Left hepatic vein \n(B) Right hepatic vein \n(C) Short hepatic veins \n(D) Hepatic pedicle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.78125, "loss": 4.070168495178223 }, { "id": "identity_282952", "image": "ScienceQA/images/20865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object?\nChoices:\n(A) scratchy\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 4.59032678604126 }, { "id": "identity_216131", "image": "TextVQA/images/00eee630d7cc202b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the container directly behind the green EUCON container?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.328125, "loss": 4.8775506019592285 }, { "id": "identity_329885", "image": "ScienceQA/images/16742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Cheyenne\n(B) Savannah\n(C) Montpelier\n(D) Burlington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.1295366287231445 }, { "id": "identity_292904", "image": "IconQA/images/82333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have an open or closed structure?\nChoices:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.6875, "loss": 4.136432647705078 }, { "id": "identity_37764", "image": "A-OKVQA/images/216930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is he in the air above the skateboard?\nChoices:\n(A) bouncing\n(B) showing off\n(C) confused\n(D) falling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.591177940368652 }, { "id": "identity_265511", "image": "AI2D/images/4061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram corresponds to the chromosphere?\nChoices:\n(A) G\n(B) F\n(C) E\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.5625, "loss": 4.303290367126465 }, { "id": "identity_36026", "image": "A-OKVQA/images/29894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man holding the racket ready to do?\nChoices:\n(A) duck\n(B) dip\n(C) dodge\n(D) hit ball" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.1875, "loss": 4.269193649291992 }, { "id": "identity_207063", "image": "A-OKVQA/images/58143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light sources are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.796875, "loss": 4.452395439147949 }, { "id": "identity_314591", "image": "PMC-VQA/images/PMC5348553_Fig6_139017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrowhead indicating in the image?\nChoices:\n(A) A normal cell\n(B) A cancer cell\n(C) An infected cell with mitochondrial staining\n(D) An infected cell with caspase-6 staining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.171875, "loss": 4.6070876121521 }, { "id": "identity_208267", "image": "VizWiz/images/VizWiz_train_00011475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many printed materials are on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.25, "loss": 4.74629545211792 }, { "id": "identity_267508", "image": "PMC-VQA/images/PMC6425224_F4_453946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which side of the sacrum is the lesion located? \nOptions:\n(A) Right\n(B) Left\n(C) Anterior\n(D) Posterior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.418875694274902 }, { "id": "identity_154825", "image": "ScienceQA/images/8616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is depicted in the image?" }, { "from": "gpt", "value": "The answer is A hat" } ], "clip_score": 31.40625, "loss": 4.0309858322143555 }, { "id": "identity_338518", "image": "A-OKVQA/images/51328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is not allowed to be done near this?\n\nChoices:\n(A) singing\n(B) parking\n(C) eating\n(D) walking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.015625, "loss": 4.642773151397705 }, { "id": "identity_309405", "image": "A-OKVQA/images/162848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on this device?\nChoices:\n(A) butterfly\n(B) cat\n(C) egg\n(D) keys" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.28125, "loss": 4.064221382141113 }, { "id": "identity_7584", "image": "A-OKVQA/images/446307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what locale do the people stand?\nChoices:\n(A) forest station\n(B) train depot\n(C) market\n(D) music studios" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.890625, "loss": 4.541009902954102 }, { "id": "identity_55216", "image": "FigureQA/images/56344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the highest value on the graph?" }, { "from": "gpt", "value": "The answer is Pas Aquino" } ], "clip_score": 26.765625, "loss": 7.317657470703125 }, { "id": "identity_107987", "image": "Super-CLEVR/images/superCLEVR_new_000529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the green matte bus?" }, { "from": "gpt", "value": "The answer is A train" } ], "clip_score": 28.546875, "loss": 4.442444801330566 }, { "id": "identity_193363", "image": "A-OKVQA/images/138347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many portable toilets are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 14.578125, "loss": 4.452115058898926 }, { "id": "identity_26370", "image": "VQA2.0/images/544601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the table made of?" }, { "from": "gpt", "value": "The answer is wood" } ], "clip_score": 15.234375, "loss": 4.889045715332031 }, { "id": "identity_264552", "image": "TextVQA/images/f06628b6576f1168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of view does the clock offer?" }, { "from": "gpt", "value": "The answer is grandview" } ], "clip_score": 21.765625, "loss": 4.285897731781006 }, { "id": "identity_218883", "image": "A-OKVQA/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candy canes are visible on the Christmas tree?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.34375, "loss": 4.516217231750488 }, { "id": "identity_265821", "image": "FigureQA/images/71840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Blue Violet or Light Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9609375, "loss": 4.533628463745117 }, { "id": "identity_29883", "image": "VizWiz/images/VizWiz_train_00005219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of wood is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 27.75, "loss": 4.029682159423828 }, { "id": "identity_26795", "image": "ScienceQA/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) breakable\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.40625, "loss": 4.191351890563965 }, { "id": "identity_297189", "image": "A-OKVQA/images/20133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does the pink and white item serve?\nChoices:\n(A) power\n(B) music\n(C) games\n(D) cell service" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.984375, "loss": 4.555635929107666 }, { "id": "identity_270659", "image": "FigureQA/images/73832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the item with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5703125, "loss": 4.716176509857178 }, { "id": "identity_27479", "image": "ScienceQA/images/4952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Tuvalu\n(B) Kiribati\n(C) Samoa\n(D) Tonga" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.9375, "loss": 4.059225082397461 }, { "id": "identity_155451", "image": "TQA/images/question_images/photosynthesis_4097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many inputs are shown in the image contributing to the process?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.203125, "loss": 4.626087188720703 }, { "id": "identity_267105", "image": "ScienceQA/images/8711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three objects among the following choices: bumpy, fragile, sweet?\nChoices:\n(A) bumpy\n(B) fragile\n(C) sweet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.265625, "loss": 4.4198737144470215 }, { "id": "identity_321270", "image": "A-OKVQA/images/289569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the purple sign doing?\nChoices:\n(A) dancing\n(B) praying\n(C) eating\n(D) singing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.390625, "loss": 4.599061012268066 }, { "id": "identity_287530", "image": "A-OKVQA/images/235687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which flag elements are depicted on the umbrella in the image?\nChoices:\n(A) nazi\n(B) american\n(C) gay rainbow\n(D) chilean" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.5234375, "loss": 4.361624717712402 }, { "id": "identity_323948", "image": "PMC-VQA/images/PMC6806509_Fig2_55187.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which probe was used to obtain this scan of the thorax?\nChoices:\n(A) Linear probe \n(B) Endoscopic ultrasound probe \n(C) Convex probe \n(D) Transvaginal probe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.28125, "loss": 4.009933948516846 }, { "id": "identity_306480", "image": "ScienceQA/images/8577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Minneapolis\n(B) Des Moines\n(C) Saint Paul\n(D) Indianapolis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.453125, "loss": 4.507691383361816 }, { "id": "identity_288224", "image": "Super-CLEVR/images/superCLEVR_new_001240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there less blue matte trucks behind the blue object compared to the number of brown metal road bikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.5625, "loss": 5.095064163208008 }, { "id": "identity_14148", "image": "DVQA/images/bar_train_00114936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.4375, "loss": 4.155363082885742 }, { "id": "identity_202901", "image": "VQA2.0/images/331214.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible chimneys are there on the houses on the left side of the street?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.4375, "loss": 5.098521709442139 }, { "id": "identity_11783", "image": "TextVQA/images/351a2d5749219403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: does this meeting look interesting?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 23.09375, "loss": 4.617661476135254 }, { "id": "identity_20147", "image": "IconQA/images/79857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of flowers even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.765625, "loss": 4.1631879806518555 }, { "id": "identity_273630", "image": "A-OKVQA/images/327807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What product is being sold by the person on the right among the following options: pizza, water, cars, or swords?\nChoices:\n(A) pizza\n(B) water\n(C) cars\n(D) swords" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9296875, "loss": 4.1997270584106445 }, { "id": "identity_308846", "image": "TQA/images/abc_question_images/optics_lense_types_19157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the plano-concave part in this lens?\nChoices:\n(A) N\n(B) M\n(C) S\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.125, "loss": 4.171679496765137 }, { "id": "identity_261507", "image": "TQA/images/abc_question_images/ocean_zones_18125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option indicates the location of the continental rise?\nChoices:\n(A) C\n(B) Y\n(C) D\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5625, "loss": 4.322365760803223 }, { "id": "identity_276051", "image": "AI2D/images/1111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part connects the leaf to the stem?\nChoices:\n(A) Margin\n(B) Petiole\n(C) Petronel\n(D) Midvein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 4.05758810043335 }, { "id": "identity_195707", "image": "PMC-VQA/images/PMC5732013_FIG5_253432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the Inferior Fronto-Occipital Fasciculus (IFOF) in the brain?" }, { "from": "gpt", "value": "The answer is Visual processing and language function" } ], "clip_score": 32.21875, "loss": 4.3386077880859375 }, { "id": "identity_287396", "image": "PMC-VQA/images/PMC8702715_F1_147743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which type of thrombus is represented by the dotted arrow?\nChoices:\n(A) white thrombus,\n(B) red thrombus,\n(C) black thrombus,\n(D) blue thrombus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.189648151397705 }, { "id": "identity_297973", "image": "IconQA/images/64605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.796875, "loss": 4.660470008850098 }, { "id": "identity_192205", "image": "A-OKVQA/images/281725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the ski lift?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.453125, "loss": 4.486659049987793 }, { "id": "identity_261668", "image": "A-OKVQA/images/576652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices represents the location where the player is currently standing? \n(A) home grass\n(B) out field\n(C) ball field\n(D) pitcher's mound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.875, "loss": 4.3743791580200195 }, { "id": "identity_38606", "image": "PMC-VQA/images/PMC6550046_DMM039065F2_489391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does each row in the figure represent? \nChoices:\n(A) Glass brain projection \n(B) Statistical analyses \n(C) Transverse section level \n(D) Coordinates relative to Bregma " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.875, "loss": 4.209963798522949 }, { "id": "identity_268555", "image": "TQA/images/abc_question_images/aquifers_16522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that indicates a well contaminated with sea water:\n(A) J\n(B) M\n(C) H\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.103766918182373 }, { "id": "identity_252353", "image": "TabMWP/images/13532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the equation of the function represented by the table and verify if it is linear or nonlinear by checking if the rate of change is constant" }, { "from": "gpt", "value": "The answer is The function is linear" } ], "clip_score": 26.25, "loss": 4.1757965087890625 }, { "id": "identity_302777", "image": "PMC-VQA/images/PMC6717321_Fig2_30960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being assessed in the postoperative radiographs?\nChoices:\n(A) Hip joint space\n(B) Leg length shortening\n(C) Bone density\n(D) Spinal curvature" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.421875, "loss": 4.144896507263184 }, { "id": "identity_326905", "image": "A-OKVQA/images/169026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of bus is depicted?\nChoices:\n(A) double-decker\n(B) school\n(C) shuttle\n(D) passenger" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.71875, "loss": 4.180370330810547 }, { "id": "identity_317714", "image": "ScienceQA/images/14186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Jackson\n(B) Charleston\n(C) Columbia\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.765625, "loss": 4.520121097564697 }, { "id": "identity_23445", "image": "DocVQA/images/ptwn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Department?" }, { "from": "gpt", "value": "The answer is field sales" } ], "clip_score": 16.375, "loss": 4.471607208251953 }, { "id": "identity_264946", "image": "A-OKVQA/images/56040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Under what circumstances would a woman brush her teeth outdoors?\nChoices:\n(A) fun\n(B) camping\n(C) homeless\n(D) nice weather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.326869964599609 }, { "id": "identity_309994", "image": "A-OKVQA/images/395382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way is the boy's attire different from the girl's?\nChoices:\n(A) t-shirt\n(B) denim shorts\n(C) flip flops\n(D) caps" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.375, "loss": 4.275723457336426 }, { "id": "identity_17978", "image": "DVQA/images/bar_train_00021828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does the darkorange color represent?" }, { "from": "gpt", "value": "The answer is affair" } ], "clip_score": 20.859375, "loss": 5.141606330871582 }, { "id": "identity_320199", "image": "VizWiz/images/VizWiz_train_00019764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this place look like?" }, { "from": "gpt", "value": "The answer is clean" } ], "clip_score": 21.0, "loss": 4.866311550140381 }, { "id": "identity_206359", "image": "A-OKVQA/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.859375, "loss": 4.2150468826293945 }, { "id": "identity_22469", "image": "DVQA/images/bar_train_00090913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the largest accuracy summed across all the datasets?" }, { "from": "gpt", "value": "The answer is notion" } ], "clip_score": 32.21875, "loss": 4.673961639404297 }, { "id": "identity_284531", "image": "DVQA/images/bar_train_00153858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.578125, "loss": 4.173516273498535 }, { "id": "identity_281575", "image": "PMC-VQA/images/PMC6230418_fig2_394787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the characteristic observed in the left ICA?\nChoices:\n(A) Aneurysm\n(B) Stenosis\n(C) Normal\n(D) Blockage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.982598781585693 }, { "id": "identity_293123", "image": "ScienceQA/images/2157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Vermont?\nChoices:\n(A) Montpelier\n(B) Trenton\n(C) Providence\n(D) Albany" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.84375, "loss": 4.0799689292907715 }, { "id": "identity_57361", "image": "DVQA/images/bar_train_00045079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the least variation in units sold across all items?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 28.8125, "loss": 4.953459739685059 }, { "id": "identity_239835", "image": "FigureQA/images/87855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in the area under the curve between Light Gold and this color?" }, { "from": "gpt", "value": "The answer is Dark Gold, approximately 1" } ], "clip_score": 33.6875, "loss": 4.059887409210205 }, { "id": "identity_125022", "image": "VQA-AS/images/11204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the slide the child is sitting on?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 17.796875, "loss": 5.108214378356934 }, { "id": "identity_320595", "image": "PMC-VQA/images/PMC3789048_f2-ol-06-03-0850_235383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the size of the tumor in the image?\nChoices:\n(A) small\n(B) medium\n(C) large\n(D) extra-large" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.551648139953613 }, { "id": "identity_28687", "image": "ScienceQA/images/9311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the fish below.\nChoices:\n(A) piranha\n(B) zebra" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.015625, "loss": 4.4964470863342285 }, { "id": "identity_275459", "image": "TQA/images/abc_question_images/hair_follicles_16987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the hair follicle is represented by the letter W?\nChoices:\n(A) HAIR MATRIX\n(B) SEBACEOUS GLAND\n(C) CUTICLE\n(D) HAIR BULB" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.46875, "loss": 4.350546836853027 }, { "id": "identity_201730", "image": "PMC-VQA/images/PMC4449209_pone.0126817.g004_391313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structures labeled with 'a' in the image?" }, { "from": "gpt", "value": "The answer is Transporting blood" } ], "clip_score": 21.640625, "loss": 4.4614949226379395 }, { "id": "identity_6615", "image": "ScienceQA/images/857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Bismarck\n(B) Springfield\n(C) Saint Paul\n(D) Minneapolis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.203125, "loss": 4.22128438949585 }, { "id": "identity_25185", "image": "IconQA/images/75860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is eighth?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 4.19845724105835 }, { "id": "identity_327194", "image": "PMC-VQA/images/PMC7559495_fig3_273731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was utilized in the research?\nChoices:\n(A) X-ray\n(B) Magnetic resonance imaging\n(C) Computed tomography\n(D) Positron emission tomography" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.168366432189941 }, { "id": "identity_229266", "image": "ScienceQA/images/17338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property benefit their typical use?" }, { "from": "gpt", "value": "The answer is Bendable property benefits comfort and flexibility in use" } ], "clip_score": 24.171875, "loss": 4.7000956535339355 }, { "id": "identity_136110", "image": "DocVQA/images/fpbf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any additives in the product?" }, { "from": "gpt", "value": "The answer is None" } ], "clip_score": 17.90625, "loss": 4.562182903289795 }, { "id": "identity_37220", "image": "ScienceQA/images/14491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Frankfort\n(B) Raleigh\n(C) Austin\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.921875, "loss": 4.285163879394531 }, { "id": "identity_336918", "image": "A-OKVQA/images/174766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person wearing protection?\nChoices:\n(A) elbows\n(B) knees\n(C) chest\n(D) head" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.21875, "loss": 4.587306022644043 }, { "id": "identity_286148", "image": "A-OKVQA/images/492132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which establishment is located in this building? \nOptions:\n(A) candy shop\n(B) airport\n(C) museum\n(D) train depot" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.21875, "loss": 4.447600364685059 }, { "id": "identity_34988", "image": "TabMWP/images/27958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2004, did canola or potatoes cost more per cwt?\nChoices:\n(A) canola\n(B) potatoes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 42.65625, "loss": 4.135378837585449 }, { "id": "identity_272545", "image": "ScienceQA/images/15366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nChoices:\n(A) yellow\n(B) flexible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.4453125, "loss": 4.184315204620361 }, { "id": "identity_185644", "image": "A-OKVQA/images/77602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the umpire signaling?" }, { "from": "gpt", "value": "The answer is No signal, hands are down" } ], "clip_score": 28.4375, "loss": 4.264173984527588 }, { "id": "identity_222281", "image": "VQA2.0/images/558387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building in the background?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 12.78125, "loss": 4.268219470977783 }, { "id": "identity_202886", "image": "VizWiz/images/VizWiz_train_00001254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cables are connected to the monitor?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.046875, "loss": 4.777736186981201 }, { "id": "identity_305256", "image": "A-OKVQA/images/233404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the scarf wrapped around the suitcase on the left?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.875, "loss": 4.048553466796875 }, { "id": "identity_216855", "image": "VQA2.0/images/157809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is prohibited next to the fire hydrant?" }, { "from": "gpt", "value": "The answer is Stopping is prohibited" } ], "clip_score": 22.53125, "loss": 4.072745323181152 }, { "id": "identity_213463", "image": "PMC-VQA/images/PMC7748165_F1_323125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely reason for performing this endoscopy based on the image?" }, { "from": "gpt", "value": "The answer is Diagnostic investigation or routine surveillance" } ], "clip_score": 30.71875, "loss": 4.37684965133667 }, { "id": "identity_196286", "image": "VQA-AS/images/17691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many panes are there in the window?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.640625, "loss": 4.04421329498291 }, { "id": "identity_245091", "image": "VQA2.0/images/252952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What types of food does this restaurant sell, and how many different types are listed on the menu board?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 19.296875, "loss": 4.3088483810424805 }, { "id": "identity_325192", "image": "PMC-VQA/images/PMC6430261_Fig5_455714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in panel (d) of the image?\nChoices:\n(A) Injected activity of the pig 13\n(B) SUVmax of the pig 13\n(C) Location of the osteomyelytic lesion in pig 13\n(D) Size of the osteomyelytic lesion in pig 13" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.03125, "loss": 4.553623676300049 }, { "id": "identity_17565", "image": "DVQA/images/bar_train_00043443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is the least preferred in any category?" }, { "from": "gpt", "value": "The answer is flesh" } ], "clip_score": 26.515625, "loss": 4.205264091491699 }, { "id": "identity_271027", "image": "FigureQA/images/90032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Green Yellow or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0, "loss": 4.539912223815918 }, { "id": "identity_263715", "image": "FigureQA/images/81066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Dim Gray or Web Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.1015625, "loss": 4.593718528747559 }, { "id": "identity_224295", "image": "TextVQA/images/b5cce7b718870ee6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the device branded \"autodesk\" and how many boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Blue, six" } ], "clip_score": 28.578125, "loss": 4.866799831390381 }, { "id": "identity_282917", "image": "PMC-VQA/images/PMC7601068_cancers-12-02897-f003_287608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized for the staging process?\nChoices:\n(A) X-ray\n(B) MRI\n(C) PET\n(D) CT scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 4.059637069702148 }, { "id": "identity_185876", "image": "A-OKVQA/images/572725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many steps are visible in the image?" }, { "from": "gpt", "value": "The answer is Six steps" } ], "clip_score": 13.8359375, "loss": 4.2977800369262695 }, { "id": "identity_20771", "image": "PMC-VQA/images/PMC7391099_fig12_222158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the tissue layer discussed in the caption? \nChoices:\n(A) Epidermis \n(B) Submucosal layers of fascia \n(C) Collagen fibers \n(D) Adipose tissue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.59375, "loss": 4.140071868896484 }, { "id": "identity_271909", "image": "TQA/images/question_images/skin_cross_section_7579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which component represents the outer layer of the skin?\nOptions:\n(A) hypodermis\n(B) pore\n(C) epidermis\n(D) dermis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.109375, "loss": 4.452645301818848 }, { "id": "identity_283793", "image": "IconQA/images/10779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a boat, which picture corresponds to the third position in the sequence? Is it a boat, a bucket, or a crab?\nChoices:\n(A) boat\n(B) bucket\n(C) crab" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.165528774261475 }, { "id": "identity_306912", "image": "PMC-VQA/images/PMC4696192_Fig1_457881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colonic loops are visible in this image?\nChoices:\n(A) Ascending and transverse \n(B) Descending and sigmoid \n(C) Dorsal and ventral of the left ascending \n(D) Sigmoid and descending colon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.03125, "loss": 4.038706302642822 }, { "id": "identity_263628", "image": "A-OKVQA/images/442942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of individuals typically ride on this train? \n(A) elderly only\n(B) salesmen\n(C) mall workers\n(D) tourists" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.5, "loss": 4.005035400390625 }, { "id": "identity_18407", "image": "FigureQA/images/26813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.875, "loss": 4.359111309051514 }, { "id": "identity_219592", "image": "PMC-VQA/images/PMC4095734_fig8_305820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abbreviation for the structure labeled with the arrow pointing upwards?" }, { "from": "gpt", "value": "The answer is LAA" } ], "clip_score": 13.875, "loss": 4.436330795288086 }, { "id": "identity_147346", "image": "DocVQA/images/pzbg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Will the RJRT fair sponsorship be advertised in The Winston-Salem Journal?" }, { "from": "gpt", "value": "The answer is If appropriate" } ], "clip_score": 27.0, "loss": 5.352626800537109 }, { "id": "identity_133168", "image": "TextVQA/images/0d9c6dd4409d6f2d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the basketball?" }, { "from": "gpt", "value": "The answer is The Rock" } ], "clip_score": 14.6953125, "loss": 5.097606182098389 }, { "id": "identity_21969", "image": "ScienceQA/images/18908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is this organism's common name?\nChoices:\n(A) Flabellina iodinea\n(B) Spanish shawl nudibranch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.4375, "loss": 4.224587440490723 }, { "id": "identity_16030", "image": "IconQA/images/47032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) certain\n(B) probable\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 4.071591377258301 }, { "id": "identity_325176", "image": "A-OKVQA/images/436127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which state is this place located?\nChoices:\n(A) idaho\n(B) illinois\n(C) ohio\n(D) michigan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.2578125, "loss": 4.2727437019348145 }, { "id": "identity_250435", "image": "A-OKVQA/images/344040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three distinct features in the image that indicate this event is most likely being held on a college campus, and how do these features collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is Bicycles, institutional buildings, organized event setup indicate a college campus" } ], "clip_score": 18.765625, "loss": 4.198835849761963 }, { "id": "identity_4003", "image": "FigureQA/images/20172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue greater than Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 4.206762313842773 }, { "id": "identity_17362", "image": "PMC-VQA/images/PMC4349652_pone.0118849.g003_364003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining was used on the nasal mucosa sections? \nChoices:\n(A) Congo Red and Alcian Blue \n(B) Hematoxylin and Eosin \n(C) Periodic Acid-Schiff \n(D) Toluidine Blue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.890625, "loss": 4.046802997589111 }, { "id": "identity_332643", "image": "A-OKVQA/images/363100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this place most likely selling?\nChoices:\n(A) wine\n(B) tires\n(C) video games\n(D) gwent cards" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.640625, "loss": 4.2528815269470215 }, { "id": "identity_291569", "image": "IconQA/images/10860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.149244785308838 }, { "id": "identity_30067", "image": "VizWiz/images/VizWiz_train_00017462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this drink?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 22.0, "loss": 4.11586332321167 }, { "id": "identity_268222", "image": "A-OKVQA/images/520043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is recommended for the drivers to take in this scenario?\nChoices:\n(A) hurry up\n(B) be patient\n(C) press horn\n(D) call police" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.703125, "loss": 4.858159065246582 }, { "id": "identity_319127", "image": "TextVQA/images/42a7f9fb024854b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word after this?" }, { "from": "gpt", "value": "The answer is landing" } ], "clip_score": 16.59375, "loss": 4.088159561157227 }, { "id": "identity_281196", "image": "A-OKVQA/images/207513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which section of the sign are these individuals currently painting? \nOptions:\n(A) top\n(B) middle\n(C) base\n(D) stop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.203125, "loss": 4.179876327514648 }, { "id": "identity_294697", "image": "ScienceQA/images/2917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New Hampshire\n(B) Kentucky\n(C) Vermont\n(D) Wisconsin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.171875, "loss": 4.311257839202881 }, { "id": "identity_288782", "image": "ScienceQA/images/8875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these two objects share among the following choices: blue or scratchy?\nChoices:\n(A) blue\n(B) scratchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.796875, "loss": 4.528072357177734 }, { "id": "identity_289220", "image": "PMC-VQA/images/PMC6735209_fig3_35618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where can the soft tissue be found?\nChoices:\n(A) Right inner ear\n(B) Left inner ear\n(C) Nose\n(D) Mouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.203125, "loss": 4.2155961990356445 }, { "id": "identity_33990", "image": "A-OKVQA/images/536720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the short haired woman wearing a khaki shirt?\nChoices:\n(A) fashion\n(B) staying cool\n(C) dress code\n(D) keeping warm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.046875, "loss": 4.369519233703613 }, { "id": "identity_119422", "image": "TextVQA/images/6566c7a2f29c2385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the record label that produced the album depicted in the image?" }, { "from": "gpt", "value": "The answer is Bloodshot" } ], "clip_score": 16.6875, "loss": 4.387402057647705 }, { "id": "identity_324346", "image": "TabMWP/images/29453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some friends discussed the sizes of their coin collections. What is the range of the amounts?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 35.71875, "loss": 4.212553024291992 }, { "id": "identity_220883", "image": "A-OKVQA/images/439373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the police officer's helmet?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.546875, "loss": 4.627590656280518 }, { "id": "identity_318831", "image": "VQA-AS/images/1593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What toy is here?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 22.609375, "loss": 4.112735748291016 }, { "id": "identity_304977", "image": "ScienceQA/images/15731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted colony?\nChoices:\n(A) Georgia\n(B) South Carolina\n(C) North Carolina\n(D) Florida" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.984375, "loss": 4.4010539054870605 }, { "id": "identity_200756", "image": "PMC-VQA/images/PMC7096069_FIG6_141121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the exact level of the spine being imaged be determined from the image?" }, { "from": "gpt", "value": "The answer is Yes, with reference landmarks" } ], "clip_score": 31.34375, "loss": 4.098470211029053 }, { "id": "identity_29690", "image": "A-OKVQA/images/399628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the round platform shown here do?\nChoices:\n(A) sit still\n(B) turn upsidedown\n(C) nothing\n(D) rotate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.4140625, "loss": 4.238831520080566 }, { "id": "identity_311556", "image": "ScienceQA/images/15915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 16 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 21 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.25, "loss": 4.107044219970703 }, { "id": "identity_10919", "image": "FigureQA/images/45651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.359375, "loss": 4.58612060546875 }, { "id": "identity_328015", "image": "PMC-VQA/images/PMC4557590_f4-cln_70p654_420034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was used to generate this data for analysis?\nChoices:\n(A) CT scan \n(B) MRI scan \n(C) PET scan \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.090876579284668 }, { "id": "identity_279003", "image": "ScienceQA/images/14186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of South Carolina?\nChoices:\n(A) Jackson\n(B) Charleston\n(C) Columbia\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.3125, "loss": 4.08255672454834 }, { "id": "identity_26488", "image": "A-OKVQA/images/202004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item here makes the horse go forward focusing?\nChoices:\n(A) shoes\n(B) garbage bags\n(C) blinders\n(D) hat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.171875, "loss": 4.226371765136719 }, { "id": "identity_202060", "image": "PMC-VQA/images/PMC4645184_f6_443921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the image?" }, { "from": "gpt", "value": "The answer is Dark blue-green" } ], "clip_score": 15.6953125, "loss": 4.094256401062012 }, { "id": "identity_217685", "image": "A-OKVQA/images/232262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the blue one on the far right?" }, { "from": "gpt", "value": "The answer is A van" } ], "clip_score": 22.921875, "loss": 4.458564758300781 }, { "id": "identity_310380", "image": "DVQA/images/bar_train_00108613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the palegoldenrod color?" }, { "from": "gpt", "value": "The answer is fee" } ], "clip_score": 20.359375, "loss": 4.736422538757324 }, { "id": "identity_108401", "image": "A-OKVQA/images/82740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the obstacle to the right of the horse?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 25.390625, "loss": 4.70313024520874 }, { "id": "identity_276706", "image": "Super-CLEVR/images/superCLEVR_new_000377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any regular-sized buses with small cyan tandem bikes on the right side?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.34375, "loss": 5.08779764175415 }, { "id": "identity_219441", "image": "PMC-VQA/images/PMC6571772_pharmaceutics-11-00248-f004_494592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible difference in size between the two hemispheres of the brain?" }, { "from": "gpt", "value": "The answer is No visible difference" } ], "clip_score": 26.34375, "loss": 4.233923435211182 }, { "id": "identity_226151", "image": "TextVQA/images/003bed7fa4de9fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different business signs are visible in the image, and what are their names?" }, { "from": "gpt", "value": "The answer is four: lil siam, Olive, Soho Spice, Ayuthai" } ], "clip_score": 25.59375, "loss": 4.248220443725586 }, { "id": "identity_131510", "image": "PMC-VQA/images/PMC8379116_Fig3_38406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different cross-sectional views of the bone are presented?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.90625, "loss": 4.035147666931152 }, { "id": "identity_327020", "image": "PMC-VQA/images/PMC7780869_F2_334052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the ground glass density observed in Patient 2's CT scan?\nChoices:\n(A) Upper lobes\n(B) Middle lobes\n(C) Lower lobes\n(D) Entire lung." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.265625, "loss": 4.260047435760498 }, { "id": "identity_256617", "image": "TextVQA/images/43c427ba34b6c147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full word that contains the two most visible letters on the left, and what is the color of the clothing of the person standing closest to this word?" }, { "from": "gpt", "value": "The answer is EAS, orange" } ], "clip_score": 9.390625, "loss": 4.090367794036865 }, { "id": "identity_21652", "image": "PMC-VQA/images/PMC8425862_advs2784-fig-0007_53973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate in the image? \nChoices:\n(A) White matter \n(B) Grey matter \n(C) Endothelial cells \n(D) Basal lamina membrane " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.65625, "loss": 4.243620872497559 }, { "id": "identity_141097", "image": "DVQA/images/bar_train_00175542.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label has the highest cumulative value for all three bars combined?" }, { "from": "gpt", "value": "The answer is ease" } ], "clip_score": 25.96875, "loss": 4.383841514587402 }, { "id": "identity_300189", "image": "VQA2.0/images/561369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this object?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 21.90625, "loss": 4.013484001159668 }, { "id": "identity_228784", "image": "Super-CLEVR/images/superCLEVR_new_000183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the materials of all the objects that are the same shape as the large green metallic object, and how many such objects are there in total?" }, { "from": "gpt", "value": "The answer is One metallic, one rubber; total two cars" } ], "clip_score": 28.09375, "loss": 4.05224609375 }, { "id": "identity_337127", "image": "Super-CLEVR/images/superCLEVR_new_000992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any large matte object to the left of a large red object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 13.375, "loss": 5.451535224914551 }, { "id": "identity_28020", "image": "A-OKVQA/images/430125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of area is the man eating pizza in?\nChoices:\n(A) cow pasture\n(B) midway\n(C) ski lodge\n(D) front yard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.703125, "loss": 4.373965263366699 }, { "id": "identity_313246", "image": "DVQA/images/bar_train_00106441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.8125, "loss": 4.159563064575195 }, { "id": "identity_326320", "image": "A-OKVQA/images/129566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event are the items inside the white cabinet on the wall typically used for?\nChoices:\n(A) emergency\n(B) educational\n(C) religious celebrations\n(D) party" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.203125, "loss": 4.751298427581787 }, { "id": "identity_298399", "image": "A-OKVQA/images/555833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which location is the cameraman most likely capturing the picture? \n(A) car rooftop\n(B) palm tree\n(C) building\n(D) mountain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.578125, "loss": 4.237958908081055 }, { "id": "identity_318557", "image": "PMC-VQA/images/PMC8207792_Fig1_469449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI indicate in the medial thalami?\nChoices:\n(A) Low signal alteration\n(B) High signal alteration\n(C) No signal alteration\n(D) Only left signal alteration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 4.325713634490967 }, { "id": "identity_329176", "image": "VQA2.0/images/26812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in this bowl?" }, { "from": "gpt", "value": "The answer is packets" } ], "clip_score": 19.84375, "loss": 5.4898362159729 }, { "id": "identity_301080", "image": "AI2D/images/4047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone is at the center of this?\nChoices:\n(A) convection\n(B) radiative\n(C) nuclear\n(D) productive" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.728716850280762 }, { "id": "identity_297298", "image": "DocVQA/images/jxcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What outcome is specified in this document?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 22.8125, "loss": 4.834075927734375 }, { "id": "identity_336274", "image": "A-OKVQA/images/136928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely reason for the person standing near the buses?\nChoices:\n(A) get directions\n(B) travel\n(C) drive\n(D) sight-see" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.390625, "loss": 5.005704879760742 }, { "id": "identity_17043", "image": "IconQA/images/92087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.390625, "loss": 4.303699493408203 }, { "id": "identity_334347", "image": "PMC-VQA/images/PMC7566610_Fig7_276673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are affected in this image?\nChoices:\n(A) Left upper and lower lobes\n(B) Right upper and lower lobes\n(C) Left lower lobe only\n(D) Right middle and lower lobes." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.4375, "loss": 4.287473201751709 }, { "id": "identity_266769", "image": "TQA/images/question_images/cycle_nitrogen_6708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the destination of Fossil Fuel Emissions?\nChoices:\n(A) Organic Matter\n(B) Gaseous Losses\n(C) Precipitation\n(D) Bacteria Fixation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.03125, "loss": 4.224983215332031 }, { "id": "identity_268687", "image": "TextVQA/images/61779df1574525bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the black part of this device?" }, { "from": "gpt", "value": "The answer is bumper body" } ], "clip_score": 18.796875, "loss": 4.011352062225342 }, { "id": "identity_296579", "image": "VQA-AS/images/12810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it possible that this room requires a thorough cleaning?" }, { "from": "gpt", "value": "The answer is i cannot tell" } ], "clip_score": 28.390625, "loss": 4.30761194229126 }, { "id": "identity_316633", "image": "DVQA/images/bar_train_00117344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.65625, "loss": 4.0895256996154785 }, { "id": "identity_326219", "image": "PMC-VQA/images/PMC7564854_F0001_275742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the clavicle is shown in the image?\nChoices:\n(A) Medial end\n(B) Lateral end\n(C) Middle of the clavicle\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 4.378116607666016 }, { "id": "identity_296210", "image": "PMC-VQA/images/PMC6639941_Fig3_11403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the prominent structures visible in the tomography images displayed in B-1 and B-2?\nChoices:\n(A) Vertebral column \n(B) Medial organ \n(C) Lateral organ \n(D) Outlet valve" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.46875, "loss": 4.559327602386475 }, { "id": "identity_9993", "image": "TextVQA/images/1b1a6fab9e5536ee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of this casino?" }, { "from": "gpt", "value": "The answer is golden" } ], "clip_score": 29.671875, "loss": 5.606685638427734 }, { "id": "identity_328808", "image": "A-OKVQA/images/293233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why do players wear white?\n\nChoices:\n(A) reduces heat\n(B) peace\n(C) unique\n(D) dress code" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5625, "loss": 4.220335960388184 }, { "id": "identity_286310", "image": "PMC-VQA/images/PMC8100232_F1_434279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which areas are depicted in green?\nChoices:\n(A) association fibers\n(B) projection fibers\n(C) subcortical nuclei\n(D) cortical regions." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.03125, "loss": 4.131823539733887 }, { "id": "identity_302499", "image": "Super-CLEVR/images/superCLEVR_new_001742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the metal object on the right side of the shiny bus shaped like the small green thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.0, "loss": 4.991233825683594 }, { "id": "identity_271159", "image": "PMC-VQA/images/PMC6360709_Fig1_431564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific plane used for the CT scan in the popliteal region?\nChoices:\n(A) Coronal plane\n(B) Sagittal plane\n(C) Axial plane\n(D) Transverse plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.890625, "loss": 4.785421371459961 }, { "id": "identity_268041", "image": "FigureQA/images/83579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.3125, "loss": 4.359762668609619 }, { "id": "identity_30764", "image": "A-OKVQA/images/526922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of problem is happening?\nChoices:\n(A) snowstorm\n(B) traffic jam\n(C) thunderstorm\n(D) house fire" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4375, "loss": 4.483250617980957 }, { "id": "identity_273855", "image": "PMC-VQA/images/PMC6908538_Fig1_82803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main objective of the image displayed?\nChoices:\n(A) To emphasize the contrast between the treatment zones.\n(B) To present the anatomy of the brain.\n(C) To illustrate the impact of radiation therapy on the human brain.\n(D) To exhibit a cross-section of the human brain." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.890625, "loss": 4.188647270202637 }, { "id": "identity_283446", "image": "TQA/images/question_images/erosion_6852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following materials is typically deposited on the slip off slope: soil, fine material, sand and shingle, or rocks?\nChoices:\n(A) soil\n(B) fine material\n(C) sand and shingle\n(D) rocks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.6875, "loss": 4.0117340087890625 }, { "id": "identity_30533", "image": "A-OKVQA/images/530013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What position will the person who stands holding the bat vertically play next?\nChoices:\n(A) catcher\n(B) shortstop\n(C) manager\n(D) batter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.625, "loss": 4.20023250579834 }, { "id": "identity_40036", "image": "DVQA/images/bar_train_00037908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference in the category evil?" }, { "from": "gpt", "value": "The answer is dinner" } ], "clip_score": 29.046875, "loss": 4.163878440856934 }, { "id": "identity_326625", "image": "TQA/images/question_images/continental_drift_6645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When is Earth's landmass referred to as Pangaea?\nChoices:\n(A) 225 million years ago\n(B) 150 million years ago\n(C) Today\n(D) 100 million years ago" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.671875, "loss": 4.009871482849121 }, { "id": "identity_121758", "image": "VQA2.0/images/222629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars are parked on the right side of the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.875, "loss": 4.937896728515625 }, { "id": "identity_264910", "image": "PMC-VQA/images/PMC5811017_pone.0192863.g001_275148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining technique was utilized in this study?\nOptions:\n(A) Von Kossa staining\n(B) H&E staining\n(C) Sirius red staining\n(D) Congo red staining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.84375, "loss": 4.047872066497803 }, { "id": "identity_296432", "image": "A-OKVQA/images/513053.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind his intense behavior?\nChoices:\n(A) needs rest\n(B) is through\n(C) is running\n(D) stole ball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.109375, "loss": 4.11986780166626 }, { "id": "identity_140497", "image": "MapQA/images/map_8662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of passengers within the range of 53.5%-53.6%?" }, { "from": "gpt", "value": "The answer is Four states" } ], "clip_score": 39.75, "loss": 4.4091362953186035 }, { "id": "identity_318381", "image": "FigureQA/images/86450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.16554594039917 }, { "id": "identity_292923", "image": "DVQA/images/bar_train_00071123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values less than 1000000000?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 21.234375, "loss": 4.118563652038574 }, { "id": "identity_283519", "image": "A-OKVQA/images/216406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the safest location for someone who is prone to overheating in this scenario?\nChoices:\n(A) on chair\n(B) under umbrella\n(C) in sand\n(D) water's edge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.885441303253174 }, { "id": "identity_21178", "image": "Super-CLEVR/images/superCLEVR_new_001098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more small purple things than big blue rubber road bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.359375, "loss": 5.12031888961792 }, { "id": "identity_226282", "image": "A-OKVQA/images/47813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are on the court, and what is the gender composition of the players in this men's doubles tennis game?" }, { "from": "gpt", "value": "The answer is Four male players" } ], "clip_score": 30.046875, "loss": 4.621590614318848 }, { "id": "identity_223720", "image": "VizWiz/images/VizWiz_train_00014929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the best before date of the product in the jar?" }, { "from": "gpt", "value": "The answer is IND 15AE" } ], "clip_score": 26.828125, "loss": 4.33094596862793 }, { "id": "identity_317003", "image": "A-OKVQA/images/391300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might the people who use this vehicle be coming back from?\nChoices:\n(A) eating contest\n(B) sales call\n(C) office\n(D) vacation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.71875, "loss": 4.12406063079834 }, { "id": "identity_215773", "image": "Super-CLEVR/images/superCLEVR_new_000032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is to the left of the large green bus and in front of the small purple metallic thing?" }, { "from": "gpt", "value": "The answer is Triangular frame with two wheels" } ], "clip_score": 31.71875, "loss": 4.231605052947998 }, { "id": "identity_257642", "image": "PMC-VQA/images/PMC7398198_Fig2_224888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure represented by the red color in the image, and how does it interact with the surrounding structures?" }, { "from": "gpt", "value": "The answer is Supplies blood to cerebellum, can compress nearby structures" } ], "clip_score": 28.34375, "loss": 4.235020160675049 }, { "id": "identity_311713", "image": "A-OKVQA/images/38645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside this container?\nChoices:\n(A) umbrella\n(B) cat\n(C) baby\n(D) single rose" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.890625, "loss": 4.221245765686035 }, { "id": "identity_18276", "image": "DVQA/images/bar_train_00128115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars contains the largest valued individual bar in the whole chart?" }, { "from": "gpt", "value": "The answer is tale" } ], "clip_score": 28.140625, "loss": 4.295168399810791 }, { "id": "identity_248836", "image": "DocVQA/images/txdw0217_45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the second column of the table, and how many studies are listed under the \"Alternative medicines (continued)\" section?" }, { "from": "gpt", "value": "The answer is Effectiveness in Treating Hot Fl, 4" } ], "clip_score": 24.796875, "loss": 4.866252422332764 }, { "id": "identity_322241", "image": "PMC-VQA/images/PMC6418144_Fig1_450835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the tectal stab lesion made?\nChoices:\n(A) right\n(B) left\n(C) both\n(D) none" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.083707332611084 }, { "id": "identity_258836", "image": "A-OKVQA/images/142002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the design under the wing and how many points does the star have?" }, { "from": "gpt", "value": "The answer is White, five points" } ], "clip_score": 21.484375, "loss": 4.1374077796936035 }, { "id": "identity_108322", "image": "TextVQA/images/ba7979567646d71e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is written at the top of the wall, starting with \"100%\"?" }, { "from": "gpt", "value": "The answer is Mind Control Development" } ], "clip_score": 13.8671875, "loss": 6.001326560974121 }, { "id": "identity_4920", "image": "A-OKVQA/images/562241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's probably casting the nearby shadow?\nChoices:\n(A) palm tree\n(B) cameraman\n(C) dog\n(D) traffic cone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.4921875, "loss": 4.660262584686279 }, { "id": "identity_306762", "image": "A-OKVQA/images/379086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What hazard seems to be happening on the road?\nChoices:\n(A) too hot\n(B) too cold\n(C) water\n(D) slippery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.28125, "loss": 4.274033546447754 }, { "id": "identity_320918", "image": "PMC-VQA/images/PMC5740132_Fig4_255954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of images are these?\nChoices:\n(A) Coronal slices\n(B) Sagittal slices\n(C) Axial slices\n(D) Volume Rendering" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.09375, "loss": 4.271482944488525 }, { "id": "identity_279625", "image": "AI2D/images/2847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where do the Olfactory Nerves originate?\nChoices:\n(A) Olfactory Bulb\n(B) Ovary\n(C) Heart\n(D) Mouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 4.010500907897949 }, { "id": "identity_325496", "image": "ScienceQA/images/17993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Pierre\n(B) Des Moines\n(C) Burlington\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.483909606933594 }, { "id": "identity_310132", "image": "PMC-VQA/images/PMC5884912_Fig4_297572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of thrombus depicted in this image?\nChoices:\n(A) Bone thrombus \n(B) Gastric thrombus \n(C) Tumor thrombus \n(D) Arterial thrombus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.34375, "loss": 4.646681308746338 }, { "id": "identity_122123", "image": "A-OKVQA/images/575465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.234375, "loss": 4.162358283996582 }, { "id": "identity_117266", "image": "TextVQA/images/f19b8a32a7c3b124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the character featured in the image?" }, { "from": "gpt", "value": "The answer is Weltall the 2nd" } ], "clip_score": 21.140625, "loss": 4.1977715492248535 }, { "id": "identity_3849", "image": "PMC-VQA/images/PMC5845433_F1_286245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to generate these reconstructions? \nChoices:\n(A) Magnetic resonance imaging \n(B) Computed tomography \n(C) Positron emission tomography \n(D) X-ray " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.421875, "loss": 4.061009407043457 }, { "id": "identity_30232", "image": "PMC-VQA/images/PMC7373644_figure2_217655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is marked with the arrowhead in the image? \nChoices:\n(A) The lesion \n(B) The nipple \n(C) The reconstructed plane \n(D) Unable to determine " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.328125, "loss": 4.460079193115234 }, { "id": "identity_275441", "image": "TQA/images/question_images/human_system_respiratory_3585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure connects the larynx to the bronchus among the following options: (A) pharynx, (B) carina of trachea, (C) trachea, (D) bronchi?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 4.370458126068115 }, { "id": "identity_184608", "image": "PMC-VQA/images/PMC4973539_Fig1_59354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screws are visible in the external fixator?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.9375, "loss": 4.775813102722168 }, { "id": "identity_262769", "image": "TQA/images/question_images/biomes_6571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the Northernmost parts of the globe, which biome among the following can be found?\nChoices:\n(A) Tundra\n(B) Savanna\n(C) Taiga\n(D) Mountain Zone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5625, "loss": 4.041304111480713 }, { "id": "identity_216332", "image": "PMC-VQA/images/PMC2519030_f5_26894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar length in the image?" }, { "from": "gpt", "value": "The answer is The length indicated on the scale bar" } ], "clip_score": 16.125, "loss": 4.10204553604126 }, { "id": "identity_312863", "image": "DVQA/images/bar_train_00188892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is evil" } ], "clip_score": 19.03125, "loss": 4.134160041809082 }, { "id": "identity_262707", "image": "A-OKVQA/images/339019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What rental service is available next to the black banner?\nChoices:\n(A) umbrellas\n(B) surfing equipment\n(C) swimsuits\n(D) floaties" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.75, "loss": 4.288661956787109 }, { "id": "identity_293176", "image": "PMC-VQA/images/PMC5483223_Fig1_182314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location was the observed lesion found?\nChoices:\n(A) Maxilla\n(B) Mandible\n(C) Premolar region\n(D) Incisor region" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.265625, "loss": 4.592396259307861 }, { "id": "identity_321763", "image": "DVQA/images/bar_train_00043140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is route" } ], "clip_score": 20.265625, "loss": 4.443782806396484 }, { "id": "identity_14616", "image": "DVQA/images/bar_train_00147773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain any negative values?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.796875, "loss": 4.095265865325928 }, { "id": "identity_107473", "image": "TextVQA/images/3cf49941dff4140c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the tablecloth?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.453125, "loss": 4.148865222930908 }, { "id": "identity_269710", "image": "FigureQA/images/40145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Steel Blue or Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.015625, "loss": 4.57748556137085 }, { "id": "identity_316946", "image": "PMC-VQA/images/PMC6243351_fig5_398320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this CT scan of the head show?\nChoices:\n(A) Post-surgical changes from left pterional craniectomy \n(B) Left middle cerebral artery territory infarct \n(C) Right middle cerebral artery territory infarct \n(D) Cerebellar infarct." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.78125, "loss": 4.330271244049072 }, { "id": "identity_305065", "image": "ScienceQA/images/18243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) bouncy\n(B) colorful\n(C) scratchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.484375, "loss": 4.526517868041992 }, { "id": "identity_267167", "image": "AI2D/images/2304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being represented in stage A?\nChoices:\n(A) larva\n(B) adult\n(C) nymph\n(D) egg mass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.125, "loss": 4.905407905578613 }, { "id": "identity_64944", "image": "ScienceQA/images/17338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used in the rain boots design?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.890625, "loss": 4.160132884979248 }, { "id": "identity_8883", "image": "IconQA/images/22976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 4.231905460357666 }, { "id": "identity_9049", "image": "ScienceQA/images/4479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Philadelphia\n(B) Pittsburgh\n(C) Harrisburg\n(D) Albany" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0625, "loss": 4.125849723815918 }, { "id": "identity_270242", "image": "AI2D/images/3355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific term for the red part of its body?\nChoices:\n(A) leg\n(B) eye\n(C) elytra\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.1875, "loss": 4.558070659637451 }, { "id": "identity_61403", "image": "MapQA/images/map_2313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range 73816-76102 on the map?" }, { "from": "gpt", "value": "The answer is a color name" } ], "clip_score": 27.109375, "loss": 4.823298931121826 }, { "id": "identity_306496", "image": "FigureQA/images/90575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.4453125, "loss": 4.408033847808838 }, { "id": "identity_38547", "image": "PMC-VQA/images/PMC6375168_Fig5_435992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows in the image indicate? \nChoices:\n(A) Right ejaculatory duct \n(B) Urethra \n(C) Left ejaculatory duct \n(D) Prostate " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.140625, "loss": 4.461461544036865 }, { "id": "identity_16619", "image": "PMC-VQA/images/PMC6375371_F1_436076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the cyst seen on MRI? \nChoices:\n(A) D5-D8 \n(B) L2-L5 \n(C) Cervical spine \n(D) Lower pole. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.03125, "loss": 4.333344459533691 }, { "id": "identity_61799", "image": "DocVQA/images/qhmk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which organization is the letter addressed?" }, { "from": "gpt", "value": "The answer is The Sugar Association" } ], "clip_score": 20.4375, "loss": 4.6008710861206055 }, { "id": "identity_31724", "image": "TQA/images/abc_question_images/human_system_excretory_16110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is representative of the kidneys?\nChoices:\n(A) V\n(B) X\n(C) Y\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.4375, "loss": 4.101193904876709 }, { "id": "identity_119204", "image": "VQA2.0/images/344566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the audience behind the activity area?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 13.7890625, "loss": 4.007808208465576 }, { "id": "identity_31092", "image": "A-OKVQA/images/309662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman holding in her hand?\nChoices:\n(A) baby\n(B) egg\n(C) wine glass\n(D) kitten" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0, "loss": 4.231690883636475 }, { "id": "identity_287828", "image": "A-OKVQA/images/330726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does the metal object in front of the building serve?\nChoices:\n(A) toll booth\n(B) recycling\n(C) art display\n(D) food stand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6953125, "loss": 4.553537368774414 }, { "id": "identity_12790", "image": "ScienceQA/images/688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) yellow\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 4.336204528808594 }, { "id": "identity_17372", "image": "A-OKVQA/images/283350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these people doing here?\nChoices:\n(A) hired\n(B) help riders\n(C) awaiting ride\n(D) invited yesterday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.578125, "loss": 4.282393455505371 }, { "id": "identity_206033", "image": "A-OKVQA/images/385141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella the person is holding?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 16.625, "loss": 4.396101951599121 }, { "id": "identity_336679", "image": "TQA/images/abc_question_images/ocean_zones_18126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates pelagic?\nChoices:\n(A) A\n(B) L\n(C) D\n(D) U" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.21875, "loss": 4.398185729980469 }, { "id": "identity_336966", "image": "ScienceQA/images/20869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option represents a fish?\nChoices:\n(A) woodpecker\n(B) green moray eel\n(C) penguin\n(D) fire salamander" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.169084072113037 }, { "id": "identity_37454", "image": "A-OKVQA/images/256549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being sold to the right of the speed limit sign?\nChoices:\n(A) books\n(B) used cars\n(C) toys\n(D) burgers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0, "loss": 4.21834659576416 }, { "id": "identity_29385", "image": "PMC-VQA/images/PMC4978392_pone.0160926.g003_60996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pathologic grade of the tissue shown in the histologic sections? \nChoices:\n(A) Metastatic tissue \n(B) Normal tissue \n(C) Hyperplastic tissue \n(D) Dysplastic tissue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.53125, "loss": 4.187551021575928 }, { "id": "identity_189767", "image": "PMC-VQA/images/PMC2811103_F3_55455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any labels or markers indicating specific brain regions?" }, { "from": "gpt", "value": "The answer is No labels or markers present" } ], "clip_score": 27.453125, "loss": 4.124801158905029 }, { "id": "identity_293915", "image": "A-OKVQA/images/60190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object of blue color that is hanging on the wall?\nChoices:\n(A) ladle\n(B) spoon\n(C) measuring cup\n(D) colander" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6328125, "loss": 4.44114351272583 }, { "id": "identity_336988", "image": "A-OKVQA/images/362090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will be the first thing they grab when they stand up?\nChoices:\n(A) dry cleaning\n(B) backpack\n(C) jacket\n(D) cane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.9296875, "loss": 4.238125324249268 }, { "id": "identity_332154", "image": "A-OKVQA/images/148395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is next to this vehicle?\nChoices:\n(A) chain\n(B) bison\n(C) cow\n(D) elk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.3359375, "loss": 4.245450973510742 }, { "id": "identity_272811", "image": "FigureQA/images/34534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.953125, "loss": 4.193949222564697 }, { "id": "identity_35505", "image": "DVQA/images/bar_train_00195380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values smaller than 7?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 22.890625, "loss": 4.124881744384766 }, { "id": "identity_23608", "image": "IconQA/images/53387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) neither; white and pink are equally likely\n(C) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.984375, "loss": 4.389958381652832 }, { "id": "identity_201535", "image": "VQA2.0/images/109076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.484375, "loss": 4.246830463409424 }, { "id": "identity_68309", "image": "ScienceQA/images/13583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the warmest air temperature on the map?" }, { "from": "gpt", "value": "The answer is Dark Red" } ], "clip_score": 29.6875, "loss": 4.044279098510742 }, { "id": "identity_22690", "image": "PlotQA/images/64263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the 4th group of bars from the top ?" }, { "from": "gpt", "value": "The answer is Kenya" } ], "clip_score": 17.546875, "loss": 4.490546703338623 }, { "id": "identity_276398", "image": "TQA/images/question_images/earth_magnetic_field_6784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of magnetic field is depicted in the figure provided? \nOptions:\n(A) Reverse\n(B) Normal\n(C) Inverse\n(D) Constant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.109375, "loss": 4.416308403015137 }, { "id": "identity_195215", "image": "TextVQA/images/4654c1d0e8e21af9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the event taking place according to the scoreboard?" }, { "from": "gpt", "value": "The answer is Elvis Night" } ], "clip_score": 23.640625, "loss": 4.7729573249816895 }, { "id": "identity_35", "image": "FigureQA/images/52911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate greater than Dark Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0, "loss": 4.150917053222656 }, { "id": "identity_261702", "image": "VQA2.0/images/179405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you clarify that for me?" }, { "from": "gpt", "value": "The answer is so am i" } ], "clip_score": 12.3984375, "loss": 5.2822041511535645 }, { "id": "identity_300127", "image": "FigureQA/images/7786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.828125, "loss": 4.012140274047852 }, { "id": "identity_271156", "image": "FigureQA/images/42469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color, Blue or Brown, is greater in value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.359375, "loss": 4.001284122467041 }, { "id": "identity_106751", "image": "VQA2.0/images/501698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.21875, "loss": 5.0068793296813965 }, { "id": "identity_324566", "image": "A-OKVQA/images/452459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who uses this space?\nChoices:\n(A) older adult\n(B) child\n(C) teenager\n(D) young adult" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.625, "loss": 4.339335918426514 }, { "id": "identity_264217", "image": "TQA/images/abc_question_images/parts_worm_17313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the oviduct among the choices provided? \n(A) T\n(B) U\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.125, "loss": 4.661879539489746 }, { "id": "identity_300938", "image": "DVQA/images/bar_train_00036643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar has the highest value?" }, { "from": "gpt", "value": "The answer is design" } ], "clip_score": 26.84375, "loss": 4.1048583984375 }, { "id": "identity_294489", "image": "A-OKVQA/images/307341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which individual or entity is the owner of the dogs depicted in the image?\nChoices:\n(A) no one\n(B) farmer\n(C) skiing lady\n(D) pet store" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.375, "loss": 4.190556049346924 }, { "id": "identity_238340", "image": "PMC-VQA/images/PMC7892652_Fig6_370679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary difference between the images in the bottom row and the images in the top row, and how does this difference impact the interpretation of the PET images?" }, { "from": "gpt", "value": "The answer is Percent difference images quantify discrepancies, aiding in accuracy assessment" } ], "clip_score": 23.765625, "loss": 4.401432514190674 }, { "id": "identity_319663", "image": "FigureQA/images/90045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.984375, "loss": 4.058990478515625 }, { "id": "identity_131431", "image": "VQA-AS/images/1593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.203125, "loss": 4.143071174621582 }, { "id": "identity_330307", "image": "PMC-VQA/images/PMC5593551_F1_214078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the top panel image show?\nChoices:\n(A) Image after some treatment\n(B) Image before some treatment\n(C) Image of a different area\n(D) Image after a procedure" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.234375, "loss": 4.590882301330566 }, { "id": "identity_54596", "image": "DocVQA/images/jkmm0020_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Outlet Type?" }, { "from": "gpt", "value": "The answer is PACK/CTN/CTS" } ], "clip_score": 21.828125, "loss": 4.214465141296387 }, { "id": "identity_322006", "image": "TQA/images/question_images/parts_microscope_7191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the wheel under the stage that adjusts light called?\nChoices:\n(A) fine control knob\n(B) iris diaphragm\n(C) objectives\n(D) condenser" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.365459442138672 }, { "id": "identity_14014", "image": "FigureQA/images/52325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.453125, "loss": 4.027210235595703 }, { "id": "identity_235256", "image": "TabMWP/images/13170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for a family of four (two adults, one child, and one senior) to visit each of the zoos listed, and which zoo offers the lowest total cost?" }, { "from": "gpt", "value": "The answer is Houston" } ], "clip_score": 39.03125, "loss": 4.971575736999512 }, { "id": "identity_272059", "image": "Super-CLEVR/images/superCLEVR_new_001294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer shiny choppers than large gray matte airliners?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.703125, "loss": 4.959831714630127 }, { "id": "identity_7341", "image": "PMC-VQA/images/PMC2915987_F1_70688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the distal bronchus in the CT image? \nChoices:\n(A) Contracted \n(B) Dilated \n(C) Collapsed \n(D) Clear " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.90625, "loss": 4.083633899688721 }, { "id": "identity_20918", "image": "IconQA/images/92395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after four." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 29.234375, "loss": 4.554707050323486 }, { "id": "identity_223172", "image": "PMC-VQA/images/PMC6861597_fig0003_70168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 24.078125, "loss": 4.20310115814209 }, { "id": "identity_305172", "image": "DVQA/images/bar_train_00172021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the lowest bar in each group?" }, { "from": "gpt", "value": "The answer is town" } ], "clip_score": 18.28125, "loss": 4.5983476638793945 }, { "id": "identity_113133", "image": "PMC-VQA/images/PMC8373912_Fig4_37009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures with red coloration can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.59375, "loss": 4.388852119445801 }, { "id": "identity_46878", "image": "MapQA/images/map_10585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the second lowest range of puppets?" }, { "from": "gpt", "value": "The answer is Light orange" } ], "clip_score": 25.96875, "loss": 4.0750861167907715 }, { "id": "identity_210095", "image": "A-OKVQA/images/535312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the bottom left image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.375, "loss": 4.810795307159424 }, { "id": "identity_240298", "image": "Geometry3K/images/75.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\(\\angle W\\)" }, { "from": "gpt", "value": "The answer is 76" } ], "clip_score": 30.265625, "loss": 4.246374130249023 }, { "id": "identity_71696", "image": "AI2D/images/515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers can you identify in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.6875, "loss": 4.9529829025268555 }, { "id": "identity_278134", "image": "TabMWP/images/8129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In 2005, which crop cost more per cwt for the Idaho farmer, lentils or onions?\nChoices:\n(A) lentils\n(B) onions" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 38.28125, "loss": 4.022316932678223 }, { "id": "identity_272370", "image": "PMC-VQA/images/PMC5415772_Fig1_160019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the right panel image, which part of the splenic artery is impacted by the aneurysm? Is it the distal third, proximal third, middle third, or is it unable to determine from the image?\nChoices:\n(A) distal third \n(B) proximal third \n(C) middle third \n(D) unable to determine from image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.359375, "loss": 4.020215034484863 }, { "id": "identity_309385", "image": "FigureQA/images/54487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.65625, "loss": 4.00971794128418 }, { "id": "identity_337026", "image": "Super-CLEVR/images/superCLEVR_new_001838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the rubber object in front of the object behind the small motorbike?" }, { "from": "gpt", "value": "The answer is jet" } ], "clip_score": 27.65625, "loss": 4.990515232086182 }, { "id": "identity_296804", "image": "FigureQA/images/35306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.578125, "loss": 4.528566360473633 }, { "id": "identity_217732", "image": "A-OKVQA/images/295448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.71875, "loss": 4.121787071228027 }, { "id": "identity_28251", "image": "A-OKVQA/images/344040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this event most likely being held?\nChoices:\n(A) military base\n(B) public park\n(C) college campus\n(D) convention center" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.6875, "loss": 4.573732852935791 }, { "id": "identity_5614", "image": "A-OKVQA/images/267175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the desk?\nChoices:\n(A) apple\n(B) computer\n(C) cat\n(D) bird" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.175379753112793 }, { "id": "identity_217707", "image": "VQA2.0/images/260056.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the donut on the bottom right corner?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.171875, "loss": 4.249669075012207 }, { "id": "identity_272608", "image": "FigureQA/images/99306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the curve with the minimum area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.1794562339782715 }, { "id": "identity_234022", "image": "TextVQA/images/350cfcd3adaa9891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the establishment that offers both a bar and dancing?" }, { "from": "gpt", "value": "The answer is Colony House" } ], "clip_score": 13.3515625, "loss": 4.299457550048828 }, { "id": "identity_271869", "image": "AI2D/images/4187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary cause of the initial breakdown of rocks as shown in the diagram?\nChoices:\n(A) magma\n(B) earthquakes\n(C) weathering\n(D) hurricanes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 4.7525458335876465 }, { "id": "identity_259008", "image": "PMC-VQA/images/PMC7881607_Fig1_367488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between MRI and CT scan imaging techniques, and how can you identify which technique was used in this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.46875, "loss": 4.066166400909424 }, { "id": "identity_264611", "image": "VQA2.0/images/169878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the indentation on the flower vase?" }, { "from": "gpt", "value": "The answer is handle" } ], "clip_score": 25.90625, "loss": 4.068388938903809 }, { "id": "identity_23037", "image": "PMC-VQA/images/PMC7876301_F2_365894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the study mentioned in the caption? \nChoices:\n(A) Blood circulation \n(B) White matter structures \n(C) Gray matter structures \n(D) Muscle fatigue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.296875, "loss": 4.73798131942749 }, { "id": "identity_317439", "image": "VQA2.0/images/571554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the individuals preparing for?" }, { "from": "gpt", "value": "The answer is ski" } ], "clip_score": 25.703125, "loss": 5.194828987121582 }, { "id": "identity_266583", "image": "AI2D/images/3993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram is located at the center? \nChoices:\n(A) spicules\n(B) core\n(C) none of the above\n(D) sunspot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 4.277261257171631 }, { "id": "identity_262532", "image": "A-OKVQA/images/104309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will obstruct plumbers from accessing the pipes when they need to work on them?\nChoices:\n(A) sink\n(B) wall\n(C) counter\n(D) microwave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.265625, "loss": 4.049197196960449 }, { "id": "identity_318455", "image": "ScienceQA/images/16915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the fish?\nChoices:\n(A) cardinalfish\n(B) sea eagle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.640625, "loss": 4.397891044616699 }, { "id": "identity_123500", "image": "TextVQA/images/ecb9e39f59ec4d90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What symbol is present inside the eye graphic?" }, { "from": "gpt", "value": "The answer is A person within a signal broadcast symbol" } ], "clip_score": 18.671875, "loss": 4.549667835235596 }, { "id": "identity_745", "image": "Super-CLEVR/images/superCLEVR_new_000814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the small object that is behind the yellow metallic bus?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 30.640625, "loss": 4.655904293060303 }, { "id": "identity_325127", "image": "TextVQA/images/19742ee7e740ee20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these can you see?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 17.6875, "loss": 4.502554893493652 }, { "id": "identity_109417", "image": "PMC-VQA/images/PMC7641477_FIG1_290961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of this brain image?" }, { "from": "gpt", "value": "The answer is The orientation is axial with the posterior side labeled" } ], "clip_score": 30.3125, "loss": 4.067375659942627 }, { "id": "identity_228022", "image": "VizWiz/images/VizWiz_train_00014650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the detailed steps to cook this meal in the microwave, including any necessary preparations and post-cooking instructions?" }, { "from": "gpt", "value": "The answer is Follow preparation, cook for 9 minutes, stir, then stand" } ], "clip_score": 20.9375, "loss": 4.055981636047363 }, { "id": "identity_327773", "image": "A-OKVQA/images/37905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red thing on the ground capable of?\nChoices:\n(A) driving around\n(B) flight\n(C) playing music\n(D) catching fish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8125, "loss": 4.809645652770996 }, { "id": "identity_132382", "image": "PMC-VQA/images/PMC7814591_Fig8_345052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the white arrow in Panel C?" }, { "from": "gpt", "value": "The answer is Ventricle" } ], "clip_score": 29.1875, "loss": 4.002313137054443 }, { "id": "identity_111221", "image": "TextVQA/images/2163e764c1e571ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum time limit mentioned on the yellow sticker?" }, { "from": "gpt", "value": "The answer is Multiple tickets are not valid" } ], "clip_score": 21.28125, "loss": 4.416299343109131 }, { "id": "identity_286504", "image": "ScienceQA/images/8616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object possess?\nChoices:\n(A) hard\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.171875, "loss": 4.083215236663818 }, { "id": "identity_130226", "image": "VQA2.0/images/299794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tarp covering the ground beneath the cart?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.828125, "loss": 4.3105974197387695 }, { "id": "identity_333985", "image": "A-OKVQA/images/414588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a specific breed of the animal on the table?\nChoices:\n(A) scottish fold\n(B) american lamancha\n(C) german shepherd\n(D) holland lop" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.296875, "loss": 4.415484428405762 }, { "id": "identity_3641", "image": "PMC-VQA/images/PMC3228743_F2_117897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding on the surgical chest radiograph? \nChoices:\n(A) wider cardiac silhouette \n(B) narrow cardiac silhouette and visualization of main pulmonary artery \n(C) narrow cardiac silhouette without visualization of main pulmonary artery \n(D) no significant finding detected on the cardiac silhouette. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.703125, "loss": 4.531672477722168 }, { "id": "identity_264796", "image": "Super-CLEVR/images/superCLEVR_new_000035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small rubber motorbike?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 25.046875, "loss": 4.513916969299316 }, { "id": "identity_292380", "image": "IconQA/images/31333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on, yellow or white?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.552297115325928 }, { "id": "identity_282421", "image": "ScienceQA/images/768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the mammal from the options below.\nChoices:\n(A) poison dart frog\n(B) fruit bat\n(C) manta ray\n(D) piranha" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9453125, "loss": 4.104808807373047 }, { "id": "identity_276255", "image": "VizWiz/images/VizWiz_train_00002590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of coffee being served?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.890625, "loss": 4.467233657836914 }, { "id": "identity_17760", "image": "FigureQA/images/32299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.359375, "loss": 4.4471588134765625 }, { "id": "identity_99262", "image": "IconQA/images/84678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which die has the highest number of dots?" }, { "from": "gpt", "value": "The answer is The fourth die" } ], "clip_score": 33.0, "loss": 4.122172832489014 }, { "id": "identity_288392", "image": "VizWiz/images/VizWiz_train_00014928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is plate food" } ], "clip_score": 25.15625, "loss": 4.203035831451416 }, { "id": "identity_4201", "image": "ScienceQA/images/16561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Albany\n(B) Trenton\n(C) Augusta\n(D) Jersey City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.164326190948486 }, { "id": "identity_296760", "image": "DVQA/images/bar_train_00195380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values less than 7?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 21.65625, "loss": 4.504664897918701 }, { "id": "identity_118821", "image": "VizWiz/images/VizWiz_train_00006758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text box containing the quote?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 11.5703125, "loss": 4.308079719543457 }, { "id": "identity_24764", "image": "A-OKVQA/images/449463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the chairs on the elephant called?\nChoices:\n(A) stool\n(B) howdah\n(C) spinner\n(D) recliner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.015625, "loss": 4.519499778747559 }, { "id": "identity_206937", "image": "TextVQA/images/9384fea1e2cbb70e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main object depicted in the center of the poster?" }, { "from": "gpt", "value": "The answer is An explosion or burst" } ], "clip_score": 22.625, "loss": 4.680287837982178 }, { "id": "identity_307402", "image": "TextVQA/images/61779df1574525bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this say in the black part?" }, { "from": "gpt", "value": "The answer is bumper body" } ], "clip_score": 12.0, "loss": 4.014373302459717 }, { "id": "identity_1130", "image": "A-OKVQA/images/128110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen weather wise in this area?\nChoices:\n(A) rain\n(B) floods\n(C) hail\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.40625, "loss": 4.2434916496276855 }, { "id": "identity_270426", "image": "FigureQA/images/15150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Medium Orchid or Dark Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.25, "loss": 4.502114772796631 }, { "id": "identity_319810", "image": "PMC-VQA/images/PMC4931091_fig1_47434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to visualize this chest?\nChoices:\n(A) CT scan\n(B) Ultrasound\n(C) X-ray\n(D) MRI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.515625, "loss": 4.23628044128418 }, { "id": "identity_278180", "image": "A-OKVQA/images/416098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of weather conditions are present at the beach setting in this scenario?\nChoices:\n(A) snowy\n(B) rain\n(C) sleet\n(D) windy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.34375, "loss": 4.188684940338135 }, { "id": "identity_266422", "image": "AI2D/images/4718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape resembles a circle or sphere in appearance?\nChoices:\n(A) rounded\n(B) cordate narrow\n(C) ovate\n(D) broad elliptic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.421875, "loss": 4.030313968658447 }, { "id": "identity_1403", "image": "A-OKVQA/images/113097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the guitar in the painting hung in the center of the wall on the right?\nChoices:\n(A) purple\n(B) blue\n(C) green\n(D) red" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 4.004258632659912 }, { "id": "identity_327023", "image": "PMC-VQA/images/PMC8075766_FIG2_428374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to create this image?\nChoices:\n(A) Computed Tomography (CT)\n(B) Magnetic Resonance Imaging (MRI)\n(C) X-Ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.265625, "loss": 4.216058254241943 }, { "id": "identity_322940", "image": "PMC-VQA/images/PMC3627906_F1_198859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What plane is shown in the first image?\nChoices:\n(A) Sagittal \n(B) Transverse \n(C) Coronal \n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.765625, "loss": 4.148557186126709 }, { "id": "identity_27608", "image": "VQA-AS/images/1613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the dog running?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.421875, "loss": 4.198176860809326 }, { "id": "identity_8696", "image": "ScienceQA/images/18041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) smooth\n(B) sticky" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.59375, "loss": 4.523711204528809 }, { "id": "identity_294581", "image": "TQA/images/question_images/glaciers_6937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is situated beneath the Headwall? \n(A) Terminal Moraine\n(B) Zone of Abrasion\n(C) Zone of Plucking\n(D) Glacial Ice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.09375, "loss": 4.5034871101379395 }, { "id": "identity_10412", "image": "DVQA/images/bar_train_00122464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars are there?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 21.78125, "loss": 4.282618522644043 }, { "id": "identity_320854", "image": "A-OKVQA/images/340508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What game is typically played on this court?\nChoices:\n(A) badminton\n(B) tennis\n(C) basketball\n(D) volleyball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.34375, "loss": 4.155567169189453 }, { "id": "identity_274501", "image": "FigureQA/images/94500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.609375, "loss": 4.00470495223999 }, { "id": "identity_107", "image": "TextVQA/images/014fbf8ef4b53fd2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are they sitting are the desk for?" }, { "from": "gpt", "value": "The answer is bildu" } ], "clip_score": 17.96875, "loss": 4.625234127044678 }, { "id": "identity_270142", "image": "ScienceQA/images/13305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New Jersey\n(B) Ohio\n(C) Connecticut\n(D) Delaware" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.203125, "loss": 4.552629470825195 }, { "id": "identity_281899", "image": "PMC-VQA/images/PMC3062562_pone-0017879-g005_90786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the perspective of the image displayed?\nChoices:\n(A) Dorsal \n(B) Anterior \n(C) Ventral \n(D) Medial" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.890625, "loss": 4.639304161071777 }, { "id": "identity_72265", "image": "ScienceQA/images/1066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clownfish can be observed in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.625, "loss": 4.528183460235596 }, { "id": "identity_189262", "image": "VQA-AS/images/2049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are in the sky?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.25, "loss": 4.201910972595215 }, { "id": "identity_301521", "image": "PMC-VQA/images/PMC6281586_Fig6_408444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bone formation is observed in the CT scan?\nChoices:\n(A) Trabecular bone formation\n(B) Cancellous bone formation\n(C) Laminar bone formation\n(D) Cortical bone formation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.484375, "loss": 4.3538007736206055 }, { "id": "identity_29910", "image": "PMC-VQA/images/PMC4940967_Fig1_50726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT scan image show? \nChoices:\n(A) Blood vessels that are swollen \n(B) Lymph nodes that are swollen \n(C) Swollen abdominal muscles \n(D) Swollen Kidneys. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.671875, "loss": 4.587536811828613 }, { "id": "identity_283410", "image": "A-OKVQA/images/544732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity might passengers disembarking from the buses in the image be able to do in the yellow canopied building? \n(A) catch rides\n(B) complain\n(C) dine\n(D) game" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.9375, "loss": 4.397818088531494 }, { "id": "identity_36628", "image": "IconQA/images/52985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.796875, "loss": 4.409518241882324 }, { "id": "identity_329190", "image": "DocVQA/images/hzxd0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first heading in the document?" }, { "from": "gpt", "value": "The answer is Woodard Research Corporation" } ], "clip_score": 21.953125, "loss": 4.416536331176758 }, { "id": "identity_310743", "image": "IconQA/images/74696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape open or not?\nChoices:\n(A) closed\n(B) open" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.28125, "loss": 4.1963300704956055 }, { "id": "identity_1313", "image": "A-OKVQA/images/563730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the green eating utensil next to the orange?\nChoices:\n(A) knife\n(B) sporf\n(C) fork\n(D) spork" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0, "loss": 4.303318977355957 }, { "id": "identity_320907", "image": "A-OKVQA/images/37209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the t-shirt worn by the person on a pedal bike in the background to the right?\nChoices:\n(A) green\n(B) blue\n(C) purple\n(D) yellow" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.53125, "loss": 4.081267356872559 }, { "id": "identity_109799", "image": "VQA-AS/images/13180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bushes are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.703125, "loss": 4.869980335235596 }, { "id": "identity_109698", "image": "VQA2.0/images/388149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing behind the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.6796875, "loss": 4.422214031219482 }, { "id": "identity_17235", "image": "ScienceQA/images/6449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Vermont?\nChoices:\n(A) Montpelier\n(B) Burlington\n(C) Salem\n(D) Boston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.84375, "loss": 4.171423435211182 }, { "id": "identity_305327", "image": "IconQA/images/13714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.65625, "loss": 4.349207878112793 }, { "id": "identity_320140", "image": "ScienceQA/images/7821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is a baseball?\nChoices:\n(A) a solid\n(B) a liquid\n(C) a gas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.140720844268799 }, { "id": "identity_269646", "image": "Super-CLEVR/images/superCLEVR_new_000084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that are of the same size as the green airplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.796875, "loss": 5.215779781341553 }, { "id": "identity_71865", "image": "TQA/images/abc_question_images/optics_lense_types_19157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the biconcave lens?" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 27.109375, "loss": 4.096808433532715 }, { "id": "identity_308706", "image": "A-OKVQA/images/482265.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item has the most protein on this plate?\nChoices:\n(A) chicken\n(B) beets\n(C) rice\n(D) carrots" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.46875, "loss": 4.042725563049316 }, { "id": "identity_68951", "image": "AI2D/images/269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 35.75, "loss": 4.817302703857422 }, { "id": "identity_127221", "image": "VQA2.0/images/509657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding in the truck?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.828125, "loss": 4.1254658699035645 }, { "id": "identity_133092", "image": "Super-CLEVR/images/superCLEVR_new_000350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.640625, "loss": 4.93044376373291 }, { "id": "identity_8240", "image": "ScienceQA/images/1636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is potassium feldspar a mineral or a rock?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.953125, "loss": 4.161451816558838 }, { "id": "identity_310208", "image": "AI2D/images/2363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Looking at the diagram of the stag beetle's life cycle, which stage is indicated by the letter G?\nChoices:\n(A) Third Instar\n(B) Eggs\n(C) First Instar\n(D) Pupa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.59375, "loss": 4.094005584716797 }, { "id": "identity_23794", "image": "Super-CLEVR/images/superCLEVR_new_001308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that is the same size as the purple matte motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.234375, "loss": 4.948422908782959 }, { "id": "identity_221025", "image": "A-OKVQA/images/228910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible buttons are there on the person's garment?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.640625, "loss": 4.1940083503723145 }, { "id": "identity_9679", "image": "TQA/images/question_images/nuclear_energy_8117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fuel is used in the furnace?\nChoices:\n(A) turbine\n(B) transmission\n(C) coal\n(D) oil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.84375, "loss": 4.153107643127441 }, { "id": "identity_109594", "image": "Super-CLEVR/images/superCLEVR_new_000864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is behind the gray thing on the right side of the big cyan bicycle that is left of the small rubber bicycle?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 30.953125, "loss": 4.283507347106934 }, { "id": "identity_248580", "image": "A-OKVQA/images/499915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key visual elements in the image that indicate the event is a protest, and how do they collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is Protest" } ], "clip_score": 23.1875, "loss": 5.415761470794678 }, { "id": "identity_326512", "image": "IconQA/images/44783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed? Answer using a time word, not a number. It is (_) past eight." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.3125, "loss": 5.513100624084473 }, { "id": "identity_263890", "image": "A-OKVQA/images/2525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the woman's position behind the cupcake displays?\nChoices:\n(A) selling them\n(B) for practice\n(C) smelling them\n(D) hoarding them" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.78125, "loss": 4.574755668640137 }, { "id": "identity_318233", "image": "VizWiz/images/VizWiz_train_00004572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of caterpillar is shown here?" }, { "from": "gpt", "value": "The answer is orange black" } ], "clip_score": 26.421875, "loss": 5.331112384796143 }, { "id": "identity_223446", "image": "A-OKVQA/images/119299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the train doors in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.015625, "loss": 4.439888000488281 }, { "id": "identity_328679", "image": "ScienceQA/images/11630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this table made of?\nChoices:\n(A) asphalt\n(B) metal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 4.1879801750183105 }, { "id": "identity_282562", "image": "IconQA/images/18196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) unlikely\n(B) impossible\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.78125, "loss": 4.546497344970703 }, { "id": "identity_309045", "image": "TQA/images/question_images/cycle_rock_6736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does magma slowly become when it crystallizes?\nChoices:\n(A) igneous rock\n(B) volcanoes\n(C) sedimentary rock\n(D) metamorphic rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.75, "loss": 4.364319801330566 }, { "id": "identity_222718", "image": "Super-CLEVR/images/superCLEVR_new_000195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object directly to the right of the purple bicycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.59375, "loss": 4.366782188415527 }, { "id": "identity_19463", "image": "AI2D/images/475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, hawk could be referred as\nChoices:\n(A) predator\n(B) energy source\n(C) prey\n(D) producer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.453125, "loss": 4.113276481628418 }, { "id": "identity_289672", "image": "ScienceQA/images/13419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Through which continent does the prime meridian pass?\nChoices:\n(A) Asia\n(B) North America\n(C) Antarctica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.25, "loss": 4.293734550476074 }, { "id": "identity_136049", "image": "MapQA/images/map_10385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the lowest percentage category?" }, { "from": "gpt", "value": "The answer is Six states" } ], "clip_score": 27.171875, "loss": 4.347560405731201 }, { "id": "identity_308402", "image": "FigureQA/images/17407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the high median Turquoise?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.265625, "loss": 4.477525234222412 }, { "id": "identity_119517", "image": "A-OKVQA/images/320150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the stuffed animal directly below the camera?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 28.625, "loss": 4.154270172119141 }, { "id": "identity_188154", "image": "PMC-VQA/images/PMC4312452_Fig2_354195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object depicted in this image?" }, { "from": "gpt", "value": "The answer is Seed" } ], "clip_score": 20.65625, "loss": 4.632576942443848 }, { "id": "identity_318248", "image": "FigureQA/images/82592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8984375, "loss": 4.320420265197754 }, { "id": "identity_273178", "image": "Super-CLEVR/images/superCLEVR_new_000216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small shiny objects to the right of the small shiny bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.390625, "loss": 5.481354713439941 }, { "id": "identity_29996", "image": "PMC-VQA/images/PMC7137677_FIG3_145941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition shown in the x-ray? \nChoices:\n(A) Gout \n(B) Osteoarthritis \n(C) Rheumatoid Arthritis \n(D) Osteoporosis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.953125, "loss": 4.693083763122559 }, { "id": "identity_71931", "image": "AI2D/images/1816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are represented in the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.890625, "loss": 4.301275730133057 }, { "id": "identity_186056", "image": "A-OKVQA/images/457119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is visible in the background?" }, { "from": "gpt", "value": "The answer is A yellow bus" } ], "clip_score": 13.9296875, "loss": 4.40484619140625 }, { "id": "identity_213819", "image": "A-OKVQA/images/68956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible on the front of the building?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 21.5625, "loss": 4.463490009307861 }, { "id": "identity_290700", "image": "A-OKVQA/images/571746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the three individuals beside the train engaged in? Are they:\n(A) sledding\n(B) running\n(C) skiing\n(D) rolling" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.481518745422363 }, { "id": "identity_26972", "image": "Super-CLEVR/images/superCLEVR_new_001240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer blue matte trucks behind the blue thing than brown metal road bikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.359375, "loss": 5.039791584014893 }, { "id": "identity_16825", "image": "A-OKVQA/images/389974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the collection of photos on the wall called?\nChoices:\n(A) spread\n(B) menagerie\n(C) album\n(D) collage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.140625, "loss": 4.34234619140625 }, { "id": "identity_315233", "image": "FigureQA/images/20876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.234375, "loss": 4.390085220336914 }, { "id": "identity_18346", "image": "PMC-VQA/images/PMC7503285_ijms-21-06163-f007_257397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side was used for control? \nChoices:\n(A) Right side \n(B) Left side \n(C) Both sides \n(D) Neither side " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.1953125, "loss": 4.192008972167969 }, { "id": "identity_315648", "image": "TQA/images/question_images/human_system_digestive_3937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the esophagus and the duodenum?\nChoices:\n(A) Cecum\n(B) Stomach\n(C) Ascending colon\n(D) Spleen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.9375, "loss": 4.155738353729248 }, { "id": "identity_6733", "image": "PMC-VQA/images/PMC7770766_Fig2_331531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which elements are being mapped in the f image? \nChoices:\n(A) C, N, Fe, and S \n(B) C, O, Ti, and F \n(C) Na, Cl, K, and Ca \n(D) H, He, Li, and Be " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.609375, "loss": 4.199092388153076 }, { "id": "identity_158564", "image": "TQA/images/question_images/glaciers_6929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the feature that forms where the glacier meets the land?" }, { "from": "gpt", "value": "The answer is Till" } ], "clip_score": 30.203125, "loss": 5.017924785614014 }, { "id": "identity_15593", "image": "FigureQA/images/58162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.28125, "loss": 4.151585578918457 }, { "id": "identity_325692", "image": "TextVQA/images/378e06f3ea05441c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is the watch on the left showing?" }, { "from": "gpt", "value": "The answer is chris brown" } ], "clip_score": 26.5625, "loss": 4.969809055328369 }, { "id": "identity_30532", "image": "A-OKVQA/images/24714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they here?\nChoices:\n(A) hang out\n(B) sell items\n(C) beggers\n(D) church" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.484375, "loss": 4.711386680603027 }, { "id": "identity_130695", "image": "TextVQA/images/a8d8c37ad3707d37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the background?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 12.9140625, "loss": 4.730471134185791 }, { "id": "identity_67799", "image": "TQA/images/question_images/fossils_8085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many steps in the image involve a change in the state of the ground material?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.546875, "loss": 4.381748199462891 }, { "id": "identity_37817", "image": "A-OKVQA/images/145447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the train car in the center of the three cars?\nChoices:\n(A) red\n(B) blue\n(C) gray\n(D) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.703125, "loss": 4.100005626678467 }, { "id": "identity_254436", "image": "PMC-VQA/images/PMC5047872_Fig3_80095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of the imaging techniques used to obtain images of the operated knee in the three specimen groups, and how do these features help in identifying the specific techniques used?" }, { "from": "gpt", "value": "The answer is MRI and CT scan" } ], "clip_score": 33.875, "loss": 4.052915096282959 }, { "id": "identity_329063", "image": "Super-CLEVR/images/superCLEVR_new_001971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a green aircraft?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.140625, "loss": 5.36084508895874 }, { "id": "identity_213061", "image": "PMC-VQA/images/PMC4033054_F4_291575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the color blue indicate in this brain image?" }, { "from": "gpt", "value": "The answer is Blue indicates lower intensity signals" } ], "clip_score": 28.609375, "loss": 4.204300403594971 }, { "id": "identity_12154", "image": "AI2D/images/614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the last stage of a mosquito?\nChoices:\n(A) pupa\n(B) egg\n(C) larva\n(D) adult" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.8125, "loss": 4.285787582397461 }, { "id": "identity_286040", "image": "TextVQA/images/cf2ab13b09dc1f54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which league is this child associated with?" }, { "from": "gpt", "value": "The answer is wroba youth" } ], "clip_score": 25.390625, "loss": 5.432494163513184 }, { "id": "identity_330581", "image": "PMC-VQA/images/PMC4247313_f2-etm-09-01-0117_340163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the unusual mass found?\nChoices:\n(A) Lower lobe of the lung \n(B) Middle lobe of the lung \n(C) Upper lobe of the lung \n(D) Diaphragm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.984375, "loss": 4.571364402770996 }, { "id": "identity_31197", "image": "A-OKVQA/images/197705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is famous for doing what these people are doing?\nChoices:\n(A) john cleese\n(B) charles lindbergh\n(C) lance armstrong\n(D) james cameron" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.46875, "loss": 4.127662658691406 }, { "id": "identity_258765", "image": "A-OKVQA/images/174766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which parts of the rider's body are not wearing protection, and how many such parts are there?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 15.5546875, "loss": 4.675833225250244 }, { "id": "identity_53645", "image": "DocVQA/images/qnwg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode used for the \"SEND\" activity to remote station \"+41 31 9813188\"?" }, { "from": "gpt", "value": "The answer is EC" } ], "clip_score": 23.34375, "loss": 4.503419876098633 }, { "id": "identity_280959", "image": "TQA/images/question_images/nuclear_energy_8120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element follows N in the given choices: Rb-92, Cs-140, U-235, or 200 MeV?\nChoices:\n(A) Rb-92\n(B) Cs-140\n(C) U-235\n(D) 200 MeV" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.59375, "loss": 4.187443733215332 }, { "id": "identity_207052", "image": "PMC-VQA/images/PMC7532002_fig12_265168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct areas are labeled with 'V3'?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.703125, "loss": 4.349854469299316 }, { "id": "identity_314255", "image": "ScienceQA/images/2664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Washington, D.C.\n(B) San Antonio\n(C) Denver\n(D) Houston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.625, "loss": 5.073378562927246 }, { "id": "identity_66909", "image": "TQA/images/abc_question_images/nuclear_energy_18118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"X\" indicate in the sequence?" }, { "from": "gpt", "value": "The answer is Fission products" } ], "clip_score": 26.640625, "loss": 4.535077095031738 }, { "id": "identity_19080", "image": "AI2D/images/3987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The outermost part of the diagram is what?\nChoices:\n(A) plasmalemma\n(B) cell wall\n(C) rough endoplasmic reticulum\n(D) vacuole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.609375, "loss": 4.82920503616333 }, { "id": "identity_33403", "image": "PMC-VQA/images/PMC4383018_f8-ijo-46-05-1872_374352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the staining of cytoplasm look like in the image? \nChoices:\n(A) blue \n(B) red \n(C) green \n(D) yellow. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.078125, "loss": 4.160070419311523 }, { "id": "identity_335180", "image": "A-OKVQA/images/254420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these devices with screens called?\nChoices:\n(A) microwaves\n(B) televisions\n(C) computers\n(D) recorders" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.03125, "loss": 4.264030456542969 }, { "id": "identity_33558", "image": "A-OKVQA/images/566568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: These types of plants are good in what environment?\nChoices:\n(A) snowy\n(B) desert\n(C) temperate\n(D) tropical" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.09375, "loss": 4.195906639099121 }, { "id": "identity_331416", "image": "VQA2.0/images/450621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person on the right holding?" }, { "from": "gpt", "value": "The answer is phone" } ], "clip_score": 18.1875, "loss": 4.284888744354248 }, { "id": "identity_265777", "image": "AI2D/images/3076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the symbol A represent?\nChoices:\n(A) vacuoles\n(B) ribosomes\n(C) chromatin\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.03125, "loss": 4.555410385131836 }, { "id": "identity_322539", "image": "A-OKVQA/images/248229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these green things bring to the beach?\nChoices:\n(A) minerals\n(B) tiny fish\n(C) salt\n(D) unwanted trash" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.390625, "loss": 4.007828235626221 }, { "id": "identity_289479", "image": "A-OKVQA/images/116694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a possible cargo that the yellow vehicle could transport?\nChoices:\n(A) cars\n(B) airplane\n(C) furniture\n(D) mobile home" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.78515625, "loss": 4.693282127380371 }, { "id": "identity_262807", "image": "A-OKVQA/images/413874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the occupation of the individuals dressed in white?\nChoices:\n(A) athletes\n(B) nurses\n(C) doctors\n(D) teachers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.390625, "loss": 4.698144912719727 }, { "id": "identity_228849", "image": "VQA2.0/images/432119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the dominant colors in this painting, and do they belong to the warm or cool palette?" }, { "from": "gpt", "value": "The answer is The painting has both warm and cool colors equally" } ], "clip_score": 19.71875, "loss": 4.395849227905273 }, { "id": "identity_288881", "image": "FigureQA/images/93701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the area under the curve with the minimum value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.28125, "loss": 4.329727649688721 }, { "id": "identity_194741", "image": "PMC-VQA/images/PMC3744490_pone-0072265-g004_224985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the objects in the image?" }, { "from": "gpt", "value": "The answer is Irregular shapes" } ], "clip_score": 17.96875, "loss": 4.177830219268799 }, { "id": "identity_192971", "image": "TextVQA/images/fb7870c0e7cacb7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the character mentioned in the synopsis on the cover?" }, { "from": "gpt", "value": "The answer is Laella" } ], "clip_score": 18.984375, "loss": 5.720917224884033 }, { "id": "identity_309477", "image": "TQA/images/abc_question_images/cycle_water_11481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening at label X?\nChoices:\n(A) TRANSPIRATION\n(B) INFILTRATION\n(C) EVAPORATION\n(D) CONDENSATION" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.625, "loss": 4.164119243621826 }, { "id": "identity_283323", "image": "DVQA/images/bar_train_00093166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the color grey?" }, { "from": "gpt", "value": "The answer is night" } ], "clip_score": 23.0625, "loss": 4.2561845779418945 }, { "id": "identity_273897", "image": "VQA-RAD/images/synpic59536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is utilized in MRI scans?" }, { "from": "gpt", "value": "The answer is DWI" } ], "clip_score": 29.265625, "loss": 4.365009784698486 }, { "id": "identity_271065", "image": "A-OKVQA/images/551338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely content of the large container located just outside the fence here? Is it:\n(A) frisbees\n(B) golf balls\n(C) water\n(D) oil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.015625, "loss": 4.456058502197266 }, { "id": "identity_282334", "image": "VizWiz/images/VizWiz_train_00013480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of yogurt?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 30.0625, "loss": 4.378109931945801 }, { "id": "identity_36013", "image": "PMC-VQA/images/PMC9338713_F1_360637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the vein in the image? \nChoices:\n(A) Internal jugular vein \n(B) External jugular vein \n(C) Femoral vein \n(D) Radial vein " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 4.752266883850098 }, { "id": "identity_25737", "image": "PMC-VQA/images/PMC7031565_os12599-fig-0006_121402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vertebrae have a transverse process? \nChoices:\n(A) thoracic vertebrae \n(B) lumbar vertebrae \n(C) cervical vertebrae \n(D) sacral vertebrae " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.282670021057129 }, { "id": "identity_30139", "image": "PMC-VQA/images/PMC4658792_Fig2_447513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the CSF leak seen in the imaging? \nChoices:\n(A) Left side middle ear area \n(B) Brain stem \n(C) Right side middle ear area \n(D) Cerebellum " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5, "loss": 4.159303665161133 }, { "id": "identity_336640", "image": "DocVQA/images/xnnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in this field?" }, { "from": "gpt", "value": "The answer is M" } ], "clip_score": 17.03125, "loss": 4.248085021972656 }, { "id": "identity_75998", "image": "AI2D/images/2371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are depicted in the life cycle shown in the diagram?" }, { "from": "gpt", "value": "The answer is Four stages" } ], "clip_score": 28.046875, "loss": 4.103846073150635 }, { "id": "identity_132179", "image": "A-OKVQA/images/469605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the grass?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.34375, "loss": 4.517650127410889 }, { "id": "identity_325340", "image": "PMC-VQA/images/PMC8109045_FIG1_436471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging sequence was used?\nChoices:\n(A) CT scan\n(B) T1 MRI sequence\n(C) T2 MRI sequence\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.296875, "loss": 4.189072608947754 }, { "id": "identity_11583", "image": "AI2D/images/841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the letter which represents the larvae?\nChoices:\n(A) B\n(B) F\n(C) G\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.1875, "loss": 4.33843994140625 }, { "id": "identity_270056", "image": "TextVQA/images/a0993d9d8ce6f776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which platform is located at the highest point?" }, { "from": "gpt", "value": "The answer is facebook" } ], "clip_score": 20.515625, "loss": 4.330158710479736 }, { "id": "identity_45388", "image": "DocVQA/images/rnxd0004_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the approval note?" }, { "from": "gpt", "value": "The answer is Winston Super Lights 83 CPB.... Export Russia" } ], "clip_score": 21.59375, "loss": 4.616151332855225 }, { "id": "identity_253617", "image": "A-OKVQA/images/537139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How might the proximity to the train tracks affect the daily lives of the residents and subsequently influence the property values in this location?" }, { "from": "gpt", "value": "The answer is Train noise and vibrations lower property values" } ], "clip_score": 26.34375, "loss": 4.124826908111572 }, { "id": "identity_191202", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What natural feature is illustrated on the right side of the coin?" }, { "from": "gpt", "value": "The answer is Beach or coastal scene" } ], "clip_score": 31.21875, "loss": 4.007891654968262 }, { "id": "identity_3990", "image": "A-OKVQA/images/407115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this store sell?\nChoices:\n(A) tires\n(B) coffee\n(C) cars\n(D) horses" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.953125, "loss": 4.356287002563477 }, { "id": "identity_324162", "image": "A-OKVQA/images/301649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is wet yeast used to make?\nChoices:\n(A) pizza\n(B) cake\n(C) dough\n(D) bread" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.4765625, "loss": 4.191617488861084 }, { "id": "identity_32636", "image": "A-OKVQA/images/218701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is offscreen to the bottom right and likely to be casting a shadow onto the snow?\nChoices:\n(A) sign\n(B) tree\n(C) fence\n(D) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.5625, "loss": 4.049485206604004 }, { "id": "identity_105077", "image": "Super-CLEVR/images/superCLEVR_new_001957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 21.578125, "loss": 5.135861873626709 }, { "id": "identity_286645", "image": "A-OKVQA/images/40341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what manner did the man reach the location just before taking this picture? Was it by skiing, walking, flying, or jogging?\nChoices:\n(A) skied\n(B) walked\n(C) flew\n(D) jogged" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.296875, "loss": 4.161403179168701 }, { "id": "identity_314413", "image": "A-OKVQA/images/416337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these fruits are the least sweet?\nChoices:\n(A) biggest\n(B) green\n(C) most freckled\n(D) smallest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.031591892242432 }, { "id": "identity_249588", "image": "IconQA/images/31236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 30.828125, "loss": 4.46782922744751 }, { "id": "identity_37188", "image": "A-OKVQA/images/526892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the guy on bare feet doing with the table?\nChoices:\n(A) cleaning\n(B) waxing\n(C) painting\n(D) sanding" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.703125, "loss": 4.399682521820068 }, { "id": "identity_299952", "image": "VQA2.0/images/217339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is around the baby's neck?" }, { "from": "gpt", "value": "The answer is bib" } ], "clip_score": 27.140625, "loss": 4.296579837799072 }, { "id": "identity_108189", "image": "VQA2.0/images/570075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the surface of the beach?" }, { "from": "gpt", "value": "The answer is Sand" } ], "clip_score": 18.234375, "loss": 4.13303279876709 }, { "id": "identity_219643", "image": "A-OKVQA/images/277625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.234375, "loss": 4.225803852081299 }, { "id": "identity_319533", "image": "ScienceQA/images/12655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Houston\n(B) Pierre\n(C) Boise\n(D) Nampa" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.442452430725098 }, { "id": "identity_20358", "image": "PMC-VQA/images/PMC7382491_Fig2_220258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining technique used in the images?\nChoices:\n(A) Thionin \n(B) Fluorescein \n(C) Nissl \n(D) Hematoxylin " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.375, "loss": 4.165255069732666 }, { "id": "identity_313126", "image": "PMC-VQA/images/PMC5820407_Fig4_278594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the texture of the hearts shown in the image?\nChoices:\n(A) Smooth\n(B) Rough\n(C) LIC\n(D) None" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.609375, "loss": 4.850485801696777 }, { "id": "identity_271759", "image": "VQA2.0/images/347037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is depicted in the photo?" }, { "from": "gpt", "value": "The answer is day" } ], "clip_score": 18.34375, "loss": 4.918456554412842 }, { "id": "identity_119873", "image": "VQA2.0/images/394784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many propellers does the plane have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.890625, "loss": 4.042928218841553 }, { "id": "identity_147514", "image": "DocVQA/images/rtxm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person discussing poultry meat?" }, { "from": "gpt", "value": "The answer is Mr. Philipe Nassrallah" } ], "clip_score": 23.203125, "loss": 4.417827129364014 }, { "id": "identity_213844", "image": "TextVQA/images/d3c654aab96a0955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.328125, "loss": 4.380648612976074 }, { "id": "identity_219554", "image": "VizWiz/images/VizWiz_train_00001447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible magnets are attached to the refrigerator?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.421875, "loss": 4.7017316818237305 }, { "id": "identity_311300", "image": "TextVQA/images/4f53ff71c6d6d56a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written inside the circle?" }, { "from": "gpt", "value": "The answer is finlande" } ], "clip_score": 11.0625, "loss": 4.789535999298096 }, { "id": "identity_1243", "image": "Super-CLEVR/images/superCLEVR_new_000601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else of the same color as the small aeroplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.875, "loss": 4.954891681671143 }, { "id": "identity_331450", "image": "A-OKVQA/images/433428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicles are present in the frontmost part of the image?\nChoices:\n(A) cars\n(B) trucks\n(C) bicycle\n(D) motorcycle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.9375, "loss": 4.169863700866699 }, { "id": "identity_318956", "image": "TQA/images/question_images/human_system_digestive_3738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this called?\nChoices:\n(A) small intestine\n(B) pharynx\n(C) stomach\n(D) mouth" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.28125, "loss": 4.2053327560424805 }, { "id": "identity_190227", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible support columns are there for the structure under construction?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.796875, "loss": 4.52466344833374 }, { "id": "identity_327797", "image": "TextVQA/images/aecf6e390f953951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does it say between these two lamps at the top?" }, { "from": "gpt", "value": "The answer is royal institution" } ], "clip_score": 22.140625, "loss": 4.402215480804443 }, { "id": "identity_20761", "image": "A-OKVQA/images/522819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of meal will be served later?\nChoices:\n(A) formal\n(B) buffet\n(C) casual\n(D) potluck" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.09375, "loss": 4.0161027908325195 }, { "id": "identity_314898", "image": "AI2D/images/3052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is also known as this?\n\nChoices:\n(A) Nucleus\n(B) Organelles\n(C) DNA\n(D) Plasma Membrane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.84375, "loss": 4.214094638824463 }, { "id": "identity_284300", "image": "A-OKVQA/images/511469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found behind the plane? \nOptions:\n(A) nothing\n(B) desert\n(C) airport\n(D) tropical island" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.625, "loss": 4.380287170410156 }, { "id": "identity_26454", "image": "A-OKVQA/images/83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are people doing here?\nChoices:\n(A) dancing\n(B) jogging\n(C) singing\n(D) buying food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.921875, "loss": 4.053090572357178 }, { "id": "identity_263392", "image": "ScienceQA/images/16294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object? \nOptions:\n(A) hard\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.109375, "loss": 4.125733852386475 }, { "id": "identity_276388", "image": "A-OKVQA/images/414113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main focus of the people on the street side among the following options: snacks, arrest, sirens, or parade?\nChoices:\n(A) snacks\n(B) arrest\n(C) sirens\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.828125, "loss": 4.510888576507568 }, { "id": "identity_120999", "image": "PMC-VQA/images/PMC4082711_fig06_303633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the majority of the objects in the image?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 13.8515625, "loss": 4.286637306213379 }, { "id": "identity_34476", "image": "A-OKVQA/images/35473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the overhead wire for?\nChoices:\n(A) power streetcars\n(B) guides streetcar\n(C) electric utility\n(D) phone lines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.21875, "loss": 4.363169193267822 }, { "id": "identity_58665", "image": "MapQA/images/map_10839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall under the value range of 6.4%-6.6%?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.109375, "loss": 4.619316101074219 }, { "id": "identity_277536", "image": "PMC-VQA/images/PMC6108967_Fig6_358725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side is pleural effusion visible? \nChoices:\n(A) Right\n(B) Left\n(C) Both sides\n(D) No pleural effusion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.629117012023926 }, { "id": "identity_133737", "image": "Super-CLEVR/images/superCLEVR_new_001202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.046875, "loss": 5.0675506591796875 }, { "id": "identity_37425", "image": "TabMWP/images/35259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5625, "loss": 4.224732875823975 }, { "id": "identity_274594", "image": "FigureQA/images/38417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Slate or Dark Olive?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.765625, "loss": 4.5876922607421875 }, { "id": "identity_271040", "image": "FigureQA/images/9064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Purple or Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.03125, "loss": 4.519790172576904 }, { "id": "identity_279412", "image": "VQA2.0/images/574971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of intelligence are books like the yellow and black one behind the bear designed for?" }, { "from": "gpt", "value": "The answer is bear" } ], "clip_score": 25.953125, "loss": 5.352283477783203 }, { "id": "identity_329386", "image": "A-OKVQA/images/338064.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is snow located?\nChoices:\n(A) in apartments\n(B) in trees\n(C) underwater\n(D) mountain peaks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.125, "loss": 4.085233211517334 }, { "id": "identity_280399", "image": "A-OKVQA/images/67456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what type of fuel does this vehicle operate?\nChoices:\n(A) potatoes\n(B) gasoline\n(C) jet fuel\n(D) denatured alcohol" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3671875, "loss": 4.237071514129639 }, { "id": "identity_132154", "image": "PMC-VQA/images/PMC7301249_fig2_194538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to represent brain activity in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 27.75, "loss": 4.033173561096191 }, { "id": "identity_321301", "image": "IconQA/images/18196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) impossible\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.078125, "loss": 4.343998432159424 }, { "id": "identity_47555", "image": "DocVQA/images/qxbx0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the report titled \"Recycling Hot Sludge to Steffen Coolers\"?" }, { "from": "gpt", "value": "The answer is Angelos, P.D.L" } ], "clip_score": 25.4375, "loss": 4.232515811920166 }, { "id": "identity_323858", "image": "DocVQA/images/xngv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the preferred location for the meeting of The Nutrition Foundations?" }, { "from": "gpt", "value": "The answer is europe" } ], "clip_score": 20.125, "loss": 5.755095481872559 }, { "id": "identity_34825", "image": "ScienceQA/images/12241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Washington?\nChoices:\n(A) Spokane\n(B) Seattle\n(C) Olympia\n(D) Denver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.375, "loss": 4.262441635131836 }, { "id": "identity_291956", "image": "A-OKVQA/images/543551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country is this bridge situated?\nChoices:\n(A) China\n(B) Australia\n(C) Britain\n(D) Italy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.71875, "loss": 4.265868186950684 }, { "id": "identity_183812", "image": "VQA2.0/images/382104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible side-view mirrors are on the motorcycle in the foreground?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.71875, "loss": 4.334569931030273 }, { "id": "identity_171677", "image": "IconQA/images/42914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The first picture is a sub. Which picture is third?" }, { "from": "gpt", "value": "The answer is The first fish" } ], "clip_score": 29.015625, "loss": 4.372035026550293 }, { "id": "identity_21321", "image": "PMC-VQA/images/PMC3431266_F2_152411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the illustration showing? \nChoices:\n(A) Side view of the T-NT \n(B) Bottom view of the T-NT \n(C) Top view of the T-NT \n(D) Cross-section of the T-NT " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.8828125, "loss": 4.498368263244629 }, { "id": "identity_143914", "image": "MapQA/images/map_10693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the lowest value range category?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 26.359375, "loss": 4.077949047088623 }, { "id": "identity_282365", "image": "A-OKVQA/images/159714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the animals kept in the enclosed area?\nChoices:\n(A) to sell\n(B) to judge\n(C) to trim\n(D) to ride" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.09375, "loss": 4.349212646484375 }, { "id": "identity_112549", "image": "Super-CLEVR/images/superCLEVR_new_001710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same shape as the tiny cyan shiny thing but larger in size?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 17.9375, "loss": 4.01655912399292 }, { "id": "identity_237928", "image": "Geometry3K/images/1536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine \\( m \\angle C \\)" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 31.078125, "loss": 4.0257062911987305 }, { "id": "identity_302726", "image": "FigureQA/images/82212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Mint the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.251776218414307 }, { "id": "identity_269033", "image": "PMC-VQA/images/PMC4604167_pone.0140476.g005_432976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of arrows are depicted in the image?\nOptions:\n(A) Green arrows\n(B) Blue arrows\n(C) Red arrows\n(D) Yellow arrows" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.609375, "loss": 4.155221462249756 }, { "id": "identity_114323", "image": "VQA2.0/images/576685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.640625, "loss": 4.371912956237793 }, { "id": "identity_330105", "image": "DVQA/images/bar_train_00127790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.765625, "loss": 4.246586322784424 }, { "id": "identity_279318", "image": "FigureQA/images/97764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.609375, "loss": 4.695518970489502 }, { "id": "identity_304393", "image": "PMC-VQA/images/PMC8019334_FIG3_409135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the image?\nChoices:\n(A) Anterior spinal fusion\n(B) Posterior spinal fusion\n(C) No visible spinal fusion\n(D) Fusion in the cervical spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.359375, "loss": 4.4124369621276855 }, { "id": "identity_323609", "image": "IconQA/images/92309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.078125, "loss": 4.023066997528076 }, { "id": "identity_33036", "image": "PMC-VQA/images/PMC8122927_jcm-10-01834-f002_441031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the high magnification image show? \nChoices:\n(A) Dark intracellular material \n(B) Fibrils and tubules \n(C) Bright intracellular material \n(D) Only fibrils and no tubules " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.96875, "loss": 4.821371555328369 }, { "id": "identity_281714", "image": "IconQA/images/43250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on: white or pink?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.84375, "loss": 4.448596954345703 }, { "id": "identity_213854", "image": "A-OKVQA/images/263104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible at the bus stop?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.15625, "loss": 4.550335884094238 }, { "id": "identity_14998", "image": "VizWiz/images/VizWiz_train_00002590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of coffee is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 27.109375, "loss": 4.3831892013549805 }, { "id": "identity_274707", "image": "PMC-VQA/images/PMC5844770_FI1700046cr-1_285937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the X-ray after the reduction of a fracture-dislocation?\nChoices:\n(A) A complete collapse of the bone \n(B) A fully healed bone \n(C) A partly comminuted fracture pattern \n(D) A dislocation of the joint" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.828125, "loss": 4.0297956466674805 }, { "id": "identity_297666", "image": "VQA2.0/images/24371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color is the glass on the left?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 10.1953125, "loss": 4.0418314933776855 }, { "id": "identity_336556", "image": "PMC-VQA/images/PMC6915108_F2_84735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the color scale represent in this image?\nChoices:\n(A) Brain regions with high glucose metabolism\n(B) Brain regions with low glucose metabolism\n(C) Brain regions with high oxygen consumption\n(D) Brain regions with low oxygen consumption" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.984375, "loss": 4.764769077301025 }, { "id": "identity_10294", "image": "PMC-VQA/images/PMC8079642_F3_429510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red circle in the figure representing? \nChoices:\n(A) Stimulated electrodes, \n(B) Response site, \n(C) Z-score, \n(D) Stimulation site. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.859375, "loss": 4.796084403991699 }, { "id": "identity_300961", "image": "A-OKVQA/images/394874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are these animals most likely to cross this river?\nChoices:\n(A) swim\n(B) walk across\n(C) ride boat\n(D) fly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.470438480377197 }, { "id": "identity_311274", "image": "AI2D/images/1081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What surrounds the cell?\nChoices:\n(A) chromatin\n(B) plasma membrane\n(C) cytosol\n(D) centriole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.260956764221191 }, { "id": "identity_296791", "image": "A-OKVQA/images/421618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location do these items grow?\nChoices:\n(A) tree\n(B) ground\n(C) bush\n(D) flower" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.125, "loss": 4.279430389404297 }, { "id": "identity_281041", "image": "FigureQA/images/72187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Pale Green or Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.640625, "loss": 4.626917839050293 }, { "id": "identity_280428", "image": "A-OKVQA/images/415569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the man holding his leg up high behind him?\nChoices:\n(A) running away\n(B) pitched ball\n(C) doing trick\n(D) stretching" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.578125, "loss": 4.934170246124268 }, { "id": "identity_306890", "image": "Super-CLEVR/images/superCLEVR_new_000047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles behind the small metal object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.25, "loss": 5.35036563873291 }, { "id": "identity_16482", "image": "ScienceQA/images/7364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) slippery\n(B) sticky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.359375, "loss": 4.264102935791016 }, { "id": "identity_317250", "image": "PMC-VQA/images/PMC7505623_FIG2_258244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging method was used to create the Maximum Intensity Projection (MIP) images?\nChoices:\n(A) X-ray imaging\n(B) Magnetic Resonance Imaging\n(C) Computed Tomography\n(D) Ultrasound imaging" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.34375, "loss": 4.222684383392334 }, { "id": "identity_5211", "image": "A-OKVQA/images/218482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the big item in the far back right?\nChoices:\n(A) baseball stadium\n(B) statue\n(C) elephant\n(D) van" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.0, "loss": 4.302481174468994 }, { "id": "identity_295378", "image": "ScienceQA/images/2873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated by the highlight?\nOptions:\n(A) Antarctica\n(B) Asia\n(C) Africa\n(D) North America" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.0625, "loss": 4.333346843719482 }, { "id": "identity_315720", "image": "AI2D/images/3259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled as A?\nChoices:\n(A) Centriole\n(B) Ribosomes\n(C) Mitochondrion\n(D) Lysosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.0625, "loss": 4.4846296310424805 }, { "id": "identity_317103", "image": "PMC-VQA/images/PMC8079624_F3_429499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging was used for localization?\nChoices:\n(A) MRI\n(B) Ultrasound\n(C) CT\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.59375, "loss": 4.051043510437012 }, { "id": "identity_37068", "image": "PMC-VQA/images/PMC3954266_fig2_273904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is contained within the incisional hernia seen on the CT scan? \nChoices:\n(A) Bowel \n(B) Ovarian cyst \n(C) Lymph node \n(D) Omentum " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.75, "loss": 4.360146999359131 }, { "id": "identity_74008", "image": "ScienceQA/images/1150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers does the sandwich bag have?" }, { "from": "gpt", "value": "The answer is One layer" } ], "clip_score": 26.84375, "loss": 4.174225330352783 }, { "id": "identity_312735", "image": "VizWiz/images/VizWiz_train_00016175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this, please?" }, { "from": "gpt", "value": "The answer is j36d" } ], "clip_score": 16.109375, "loss": 4.094664096832275 }, { "id": "identity_34526", "image": "VQA2.0/images/390345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gender of the person in the photo?" }, { "from": "gpt", "value": "The answer is man" } ], "clip_score": 19.015625, "loss": 5.4530134201049805 }, { "id": "identity_277313", "image": "A-OKVQA/images/235672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for using covered gazebos in the outdoor area?\nChoices:\n(A) too icy\n(B) stay dry\n(C) too sunny\n(D) too windy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 4.460140228271484 }, { "id": "identity_130891", "image": "A-OKVQA/images/46339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What product is advertised on the sign next to the central mannequin?" }, { "from": "gpt", "value": "The answer is Everyday Living" } ], "clip_score": 30.40625, "loss": 4.877814292907715 }, { "id": "identity_129021", "image": "VQA-AS/images/13851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fish can be seen in the pond?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.9375, "loss": 4.161637783050537 }, { "id": "identity_323810", "image": "A-OKVQA/images/528527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What energy powers this?\nChoices:\n(A) electricity\n(B) solar\n(C) wind\n(D) manual power" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.4765625, "loss": 4.111621856689453 }, { "id": "identity_244101", "image": "TextVQA/images/974315dec07b9f44.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full word visible on the wall of the dugout, and how many players are standing in front of it?" }, { "from": "gpt", "value": "The answer is TECH, six" } ], "clip_score": 20.796875, "loss": 4.487782001495361 }, { "id": "identity_133733", "image": "TextVQA/images/a771bb9cccb9c9d2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the fire truck?" }, { "from": "gpt", "value": "The answer is A yellow car" } ], "clip_score": 24.109375, "loss": 4.225056171417236 }, { "id": "identity_107124", "image": "VQA-RAD/images/synpic23803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color tone of the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 20.171875, "loss": 4.154187202453613 }, { "id": "identity_328754", "image": "PMC-VQA/images/PMC5265201_Fig12_118782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the opacities in the chest x-ray?\n\nChoices:\n(A) Central and apical\n(B) Peripheral and basilar\n(C) Bilateral and symmetrical\n(D) Unilateral and focal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.34375, "loss": 4.9732346534729 }, { "id": "identity_307495", "image": "IconQA/images/18021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) probable\n(B) unlikely\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.0625, "loss": 4.150058269500732 }, { "id": "identity_334583", "image": "A-OKVQA/images/106453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which era was this image captured?\nChoices:\n(A) frontier days\n(B) prehistoric\n(C) modern\n(D) victorian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.1875, "loss": 4.65606164932251 }, { "id": "identity_299805", "image": "A-OKVQA/images/523473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the absence of leaves on some trees in this location?\nChoices:\n(A) they're dead\n(B) spring\n(C) sap suckers\n(D) summer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.375, "loss": 4.443509101867676 }, { "id": "identity_330216", "image": "PMC-VQA/images/PMC6038976_F2_340196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed along with the solid mass?\nChoices:\n(A) Bleeding \n(B) Fibrosis \n(C) Calcification \n(D) Infection" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0625, "loss": 4.124244689941406 }, { "id": "identity_209278", "image": "VQA2.0/images/504021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the corner of the street?" }, { "from": "gpt", "value": "The answer is A bank" } ], "clip_score": 24.21875, "loss": 4.065774917602539 }, { "id": "identity_318980", "image": "PMC-VQA/images/PMC5920263_fig01_306710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the radiographs of the ankle show?\nChoices:\n(A) Only a fracture of a medial malleolus.\n(B) A fracture of a lateral malleolus.\n(C) A suspicious avulsion fracture on the anterior part of a lateral malleolus.\n(D) A suspicious avulsion fracture on the posterior part of a lateral malleolus." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.921875, "loss": 4.055634498596191 }, { "id": "identity_122932", "image": "A-OKVQA/images/140291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person in the foreground?" }, { "from": "gpt", "value": "The answer is Navy blue" } ], "clip_score": 23.53125, "loss": 4.580312728881836 }, { "id": "identity_278023", "image": "A-OKVQA/images/295565.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located at the front of the train among the following options: (A) bear, (B) cat, (C) old lady, or (D) child?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.234375, "loss": 4.134920120239258 }, { "id": "identity_294484", "image": "PMC-VQA/images/PMC5006384_Fig2_68539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized for assessing the mice? \nOptions:\n(A) CT scan\n(B) PET scan\n(C) MRI\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0, "loss": 4.171566963195801 }, { "id": "identity_297078", "image": "A-OKVQA/images/218155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which type of weather is the cyclist most prepared today?\nChoices:\n(A) tornado\n(B) rain\n(C) earthquake\n(D) tsunami" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.9210004806518555 }, { "id": "identity_75097", "image": "AI2D/images/1807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers are prey to the Raven according to the food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.328125, "loss": 4.280709266662598 }, { "id": "identity_47876", "image": "DVQA/images/bar_train_00014254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest value for the category \"male\"?" }, { "from": "gpt", "value": "The answer is age" } ], "clip_score": 22.859375, "loss": 4.119160175323486 }, { "id": "identity_306950", "image": "A-OKVQA/images/46537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of public servant is the person on the motorcycle escorting the bus?\n\nChoices:\n(A) fireman\n(B) soldier\n(C) policeman\n(D) inspector" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.359375, "loss": 4.212409973144531 }, { "id": "identity_182209", "image": "A-OKVQA/images/223580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tent to the far right?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.890625, "loss": 4.124293327331543 }, { "id": "identity_337985", "image": "PMC-VQA/images/PMC3773898_fig4_231613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body was operated on in Case 2?\nChoices:\n(A) Lumbar spine\n(B) Thoracic spine\n(C) Cervical spine\n(D) Sacral spine" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.453125, "loss": 4.235196590423584 }, { "id": "identity_268979", "image": "PMC-VQA/images/PMC7251740_Fig1_178955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the mass observed in echocardiographic imaging originate from?\nChoices:\n(A) Left ventricle \n(B) Aorta \n(C) Right atrium \n(D) Left atrium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 4.1159162521362305 }, { "id": "identity_148598", "image": "DocVQA/images/mswg0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the diet plan allow for any sugar to be added to the water-packed peaches?" }, { "from": "gpt", "value": "The answer is with saccharin - no sugar" } ], "clip_score": 19.546875, "loss": 4.02099084854126 }, { "id": "identity_327096", "image": "TQA/images/question_images/parts_skeleton_7277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the skull to the pelvis?\nChoices:\n(A) humerus\n(B) ribs\n(C) vertebral column\n(D) sternum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.875, "loss": 4.35406494140625 }, { "id": "identity_283610", "image": "PMC-VQA/images/PMC9399794_F1_378886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging result post-procedure? \nOptions:\n(A) Hypodensity\n(B) Hyperdensity\n(C) Isodensity\n(D) No change in density" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.274763584136963 }, { "id": "identity_271049", "image": "TQA/images/abc_question_images/optics_reflection_19185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the reflective ray in the illustration? \n(A) K\n(B) H\n(C) I\n(D) K" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.5625, "loss": 4.211177349090576 }, { "id": "identity_309027", "image": "ScienceQA/images/8735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Birmingham\n(B) Baton Rouge\n(C) Huntington\n(D) Charleston" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.296875, "loss": 4.067206382751465 }, { "id": "identity_269507", "image": "ScienceQA/images/1636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is potassium feldspar classified as a mineral or a rock?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.796875, "loss": 4.011956214904785 }, { "id": "identity_210436", "image": "VQA2.0/images/491430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stripes are on the shoe?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.609375, "loss": 4.129230499267578 }, { "id": "identity_323036", "image": "VQA2.0/images/30841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it sunset?" }, { "from": "gpt", "value": "The answer is not yet" } ], "clip_score": 14.8515625, "loss": 4.623926639556885 }, { "id": "identity_328063", "image": "ScienceQA/images/6103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Connecticut\n(B) Florida\n(C) North Carolina\n(D) Virginia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.453125, "loss": 4.167867183685303 }, { "id": "identity_53473", "image": "MapQA/images/map_5798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have the lowest range of children number according to the legend?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 31.34375, "loss": 4.046297073364258 }, { "id": "identity_30007", "image": "Super-CLEVR/images/superCLEVR_new_000846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small bicycle that is left of the large bicycle behind the road bike that is on the right side of the yellow bicycle made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 23.4375, "loss": 4.965441703796387 }, { "id": "identity_281385", "image": "IconQA/images/1468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) impossible\n(B) unlikely\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.462512016296387 }, { "id": "identity_212383", "image": "A-OKVQA/images/341736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.71875, "loss": 4.456813812255859 }, { "id": "identity_248882", "image": "A-OKVQA/images/367608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key components of the skate trick the man is doing, and how do they differentiate it from the other types of tricks listed?" }, { "from": "gpt", "value": "The answer is The skateboard rotates around its horizontal axis using feet" } ], "clip_score": 29.9375, "loss": 4.237448692321777 }, { "id": "identity_279986", "image": "ScienceQA/images/15014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\nChoices:\n(A) Trenton\n(B) Montpelier\n(C) Providence\n(D) Newport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.515625, "loss": 4.102015495300293 }, { "id": "identity_281485", "image": "ScienceQA/images/12735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the transfer of thermal energy during this time: from each salmon to the surroundings or from the surroundings to each salmon?\nChoices:\n(A) each salmon . . . the surroundings\n(B) the surroundings . . . each salmon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.59375, "loss": 4.173031806945801 }, { "id": "identity_15765", "image": "Super-CLEVR/images/superCLEVR_new_000330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of matte objects left of the tiny red bicycle and small yellow cruisers?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.625, "loss": 4.967098712921143 }, { "id": "identity_279954", "image": "A-OKVQA/images/425269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are they currently engaged in?\nChoices:\n(A) eating breakfast\n(B) resting\n(C) enjoying scenery\n(D) arguing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0, "loss": 4.107388973236084 }, { "id": "identity_292484", "image": "A-OKVQA/images/570601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the origin of baseball?\nChoices:\n(A) sweden\n(B) england\n(C) america\n(D) france" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.0625, "loss": 4.106752872467041 }, { "id": "identity_224245", "image": "PMC-VQA/images/PMC8005652_F4_405303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the variety of Okra shown in the image and describe the key morphological features that distinguish it from the other varieties listed" }, { "from": "gpt", "value": "The answer is Clemson variety with ribbed, spineless pods" } ], "clip_score": 17.328125, "loss": 4.1334547996521 }, { "id": "identity_328967", "image": "PMC-VQA/images/PMC5944512_F4_312494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method was used to visualize these lymph nodes?\nChoices:\n(A) MRI\n(B) CT\n(C) PET\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.890625, "loss": 4.0067644119262695 }, { "id": "identity_258737", "image": "A-OKVQA/images/247146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what are three specific indicators that suggest the game is lopsided in terms of skill level?" }, { "from": "gpt", "value": "The answer is Player positions, scoreboard, audience reactions" } ], "clip_score": 19.578125, "loss": 4.08302116394043 }, { "id": "identity_265638", "image": "DocVQA/images/txcx0227_18.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Whose name does this resume belong to?" }, { "from": "gpt", "value": "The answer is Ei Soon Cho" } ], "clip_score": 20.21875, "loss": 4.184586048126221 }, { "id": "identity_297872", "image": "PMC-VQA/images/PMC7175264_advs1618-fig-0003_156034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain region is referred to in the caption?\nOptions:\n(A) hypothalamus\n(B) cerebellum\n(C) medulla oblongata\n(D) optic chiasm." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.484375, "loss": 5.294129371643066 }, { "id": "identity_302868", "image": "VQA2.0/images/262260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why do the birds appear as dark shapes?" }, { "from": "gpt", "value": "The answer is direction of sunlight" } ], "clip_score": 27.65625, "loss": 4.007976055145264 }, { "id": "identity_13342", "image": "IconQA/images/92245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) certain\n(B) unlikely\n(C) impossible\n(D) probable" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.125, "loss": 4.408372402191162 }, { "id": "identity_78699", "image": "TQA/images/question_images/tides_126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase of the moon is shown at the top of the image?" }, { "from": "gpt", "value": "The answer is First quarter" } ], "clip_score": 29.09375, "loss": 4.435888767242432 }, { "id": "identity_275546", "image": "ScienceQA/images/20631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following factors served as an independent variable in this experiment?\nChoices:\n(A) the number of days until a seed germinated\n(B) the temperature of the heating pad" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.03125, "loss": 4.684623718261719 }, { "id": "identity_33234", "image": "A-OKVQA/images/307341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns the dogs shown here?\nChoices:\n(A) no one\n(B) farmer\n(C) skiing lady\n(D) pet store" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.375, "loss": 4.039592266082764 }, { "id": "identity_244061", "image": "VQA2.0/images/163568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are there in the image, and what is the total number of animals present?" }, { "from": "gpt", "value": "The answer is seven animals" } ], "clip_score": 21.5, "loss": 5.2403035163879395 }, { "id": "identity_305951", "image": "AI2D/images/3642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which layer is between the left atrium and a coronary artery on the left?\nChoices:\n(A) aorta\n(B) pulmonary artery\n(C) left pulmonary veins\n(D) descending aorta" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.203125, "loss": 4.482019901275635 }, { "id": "identity_291267", "image": "IconQA/images/15667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.015625, "loss": 4.02893590927124 }, { "id": "identity_324100", "image": "PMC-VQA/images/PMC9217618_fig2_317191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is being observed in this pathological observation?\nChoices:\n(A) Kidney tissues\n(B) Lung tissues\n(C) Liver tissues\n(D) Stomach tissues" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.07767391204834 }, { "id": "identity_43723", "image": "FigureQA/images/45931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 30.40625, "loss": 4.142569541931152 }, { "id": "identity_311914", "image": "Super-CLEVR/images/superCLEVR_new_000957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the small purple thing the same as the big thing that is to the left of the big yellow motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.484375, "loss": 5.02701473236084 }, { "id": "identity_295618", "image": "VQA2.0/images/495043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of work are the men seen doing in the picture?" }, { "from": "gpt", "value": "The answer is road" } ], "clip_score": 21.75, "loss": 5.700640678405762 }, { "id": "identity_75717", "image": "ScienceQA/images/4579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of mixture is represented in the image?" }, { "from": "gpt", "value": "The answer is Homogeneous mixture" } ], "clip_score": 24.71875, "loss": 4.097358226776123 }, { "id": "identity_330932", "image": "PMC-VQA/images/PMC6392861_F2_442004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the inflammation found?\nChoices:\n(A) Left cerebral hemisphere\n(B) Spinal cord\n(C) Right cerebral hemisphere\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.125, "loss": 4.366931915283203 }, { "id": "identity_322786", "image": "TabMWP/images/6453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the table represent a linear or nonlinear function?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.625, "loss": 4.346682548522949 }, { "id": "identity_27855", "image": "IconQA/images/40745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.28125, "loss": 4.7432403564453125 }, { "id": "identity_279242", "image": "PMC-VQA/images/PMC6331538_F9_422269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the inset of Figure 2G?\nChoices:\n(A) Labeling on cell membranes. \n(B) Higher magnification image. \n(C) Small cytoplasmic molecules. \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.359375, "loss": 4.203310966491699 }, { "id": "identity_59492", "image": "DVQA/images/bar_train_00065602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest preference for \"supper\"?" }, { "from": "gpt", "value": "The answer is shock" } ], "clip_score": 29.140625, "loss": 4.841106414794922 }, { "id": "identity_336168", "image": "PMC-VQA/images/PMC4450475_Fig5_391774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this show?\nChoices:\n(A) A post-operative high-resolution temporal bone CT scan.\n(B) A surgical procedure for the temporal bone.\n(C) Bony overhang around the round window.\n(D) Abnormalities in the inner ear." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.71875, "loss": 4.442940711975098 }, { "id": "identity_338168", "image": "A-OKVQA/images/90393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are shown here in the foreground?\nChoices:\n(A) evergreen\n(B) conifers\n(C) christmas\n(D) deciduous" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.03515625, "loss": 4.252477169036865 }, { "id": "identity_34120", "image": "PMC-VQA/images/PMC6748179_fig7_38893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in images (b-d)? \nChoices:\n(A) Ground truth annotation \n(B) Study objective \n(C) Raw data \n(D) Validation results " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.015625, "loss": 4.163434028625488 }, { "id": "identity_195566", "image": "TextVQA/images/8b7c688892e36f16.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baseball players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.1875, "loss": 4.1009392738342285 }, { "id": "identity_270483", "image": "Super-CLEVR/images/superCLEVR_new_001915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the shiny object that shares the same color as the shiny airplane?" }, { "from": "gpt", "value": "The answer is suv" } ], "clip_score": 28.15625, "loss": 4.250599384307861 }, { "id": "identity_294054", "image": "A-OKVQA/images/266200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the individual under the orange umbrella?\nChoices:\n(A) dog walker\n(B) police officer\n(C) queen\n(D) life guard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.65625, "loss": 4.268961429595947 }, { "id": "identity_194729", "image": "PMC-VQA/images/PMC7972202_Fig1_394605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the black arrows in the image?" }, { "from": "gpt", "value": "The answer is Possible fluid accumulation or mass" } ], "clip_score": 23.5, "loss": 4.968732833862305 }, { "id": "identity_21961", "image": "A-OKVQA/images/291140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why do you need to frequently repair beach houses?\nChoices:\n(A) law\n(B) nosy neighbors\n(C) environmental wear\n(D) beauty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.484375, "loss": 4.073715686798096 }, { "id": "identity_315346", "image": "ScienceQA/images/8268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the air temperature measured within the outlined area?\nChoices:\n(A) -2°C\n(B) 5°C\n(C) 12°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 4.177090167999268 }, { "id": "identity_14285", "image": "TextVQA/images/a83b37081e0d79cf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on those banners?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 5.8671875, "loss": 4.250047206878662 }, { "id": "identity_221420", "image": "PMC-VQA/images/PMC4766417_f2_5619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many white objects are there in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 17.390625, "loss": 4.416746616363525 }, { "id": "identity_20912", "image": "A-OKVQA/images/19349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Most of these items are probably used on what?\nChoices:\n(A) televisions\n(B) cars\n(C) light fixtures\n(D) air conditioners" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.1875, "loss": 4.132941246032715 }, { "id": "identity_334027", "image": "TQA/images/question_images/earth_eclipses_677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the moon and the sun?\nChoices:\n(A) pluto\n(B) umbra\n(C) earth\n(D) penumbra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.75, "loss": 4.288478374481201 }, { "id": "identity_303731", "image": "A-OKVQA/images/42953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The helicopter helps which kind of sports participants?\nChoices:\n(A) bowlers\n(B) none\n(C) skiers\n(D) sledders" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.671875, "loss": 4.225942134857178 }, { "id": "identity_270757", "image": "ScienceQA/images/9570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area?\nChoices:\n(A) Palau\n(B) Fiji\n(C) Tonga\n(D) Samoa" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.71875, "loss": 4.348557472229004 }, { "id": "identity_273480", "image": "ScienceQA/images/14047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) bouncy\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.46875, "loss": 4.385273456573486 }, { "id": "identity_300666", "image": "TQA/images/question_images/nuclear_energy_7103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of a nuclear power plant absorbs neutrons and halts the chain reaction?\n\nChoices:\n(A) Pump\n(B) Control Rods\n(C) Reactor Vessel\n(D) Generator" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 4.1239495277404785 }, { "id": "identity_337358", "image": "A-OKVQA/images/214810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animals are seen in the image?\nChoices:\n(A) sheep\n(B) goat\n(C) cow\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.15625, "loss": 4.0385026931762695 }, { "id": "identity_289116", "image": "VQA2.0/images/509601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals currently dressed in orange shirts?" }, { "from": "gpt", "value": "The answer is lot" } ], "clip_score": 22.765625, "loss": 4.158429145812988 }, { "id": "identity_31125", "image": "IconQA/images/31333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.828125, "loss": 4.735090732574463 }, { "id": "identity_76202", "image": "ScienceQA/images/8866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.875, "loss": 4.593898296356201 }, { "id": "identity_148668", "image": "DocVQA/images/kjgf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the expected publication period for the article on \"Folic Acid, Vitamin B6, Pantothenic Acid and Vitamin B12 in Human Dietaries\"?" }, { "from": "gpt", "value": "The answer is Sept.-Oct" } ], "clip_score": 17.984375, "loss": 4.519558429718018 }, { "id": "identity_294521", "image": "A-OKVQA/images/63040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of work do the animals here do?\nChoices:\n(A) service dogs\n(B) majorettes\n(C) dog walkers\n(D) cooks" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.90625, "loss": 4.215032577514648 }, { "id": "identity_18830", "image": "PMC-VQA/images/PMC8034399_F4_412878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiographic appearance of the lesion shown in the CT images? \nChoices:\n(A) Lytic \n(B) Well-circumscribed \n(C) Multilocular \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.795310020446777 }, { "id": "identity_300402", "image": "TextVQA/images/ea7f0c93cf7d59ba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word written in red under the exclamation mark on the left side?" }, { "from": "gpt", "value": "The answer is soap" } ], "clip_score": 18.5, "loss": 4.920873165130615 }, { "id": "identity_255102", "image": "TextVQA/images/57e1ac153cde6348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flavor of the Floris beer shown in the image, and what other brands of beer are visible in the background?" }, { "from": "gpt", "value": "The answer is Honey, Passoa, Gordon's, Achouffe, Achiel" } ], "clip_score": 37.5625, "loss": 4.805211544036865 }, { "id": "identity_199950", "image": "PMC-VQA/images/PMC5579201_Fig6_210197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label \"AL\" indicating in the image?" }, { "from": "gpt", "value": "The answer is Anterior lateral microtubule cell" } ], "clip_score": 27.640625, "loss": 4.065374374389648 }, { "id": "identity_322679", "image": "A-OKVQA/images/133061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the animal on the right?\nChoices:\n(A) ribbon\n(B) crown\n(C) baby\n(D) rope" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.4453125, "loss": 4.067963600158691 }, { "id": "identity_145714", "image": "DVQA/images/bar_train_00163268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the graph?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.578125, "loss": 4.470633506774902 }, { "id": "identity_27516", "image": "VQA2.0/images/61488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on top of the blue rectangular stand?" }, { "from": "gpt", "value": "The answer is towel" } ], "clip_score": 12.09375, "loss": 4.094542503356934 }, { "id": "identity_225647", "image": "A-OKVQA/images/404655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for the persons walking to be dressed in light clothing, and how does this relate to the temperature they are experiencing?" }, { "from": "gpt", "value": "The answer is Hot weather" } ], "clip_score": 28.28125, "loss": 4.567211627960205 }, { "id": "identity_4762", "image": "VQA2.0/images/92315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bear holding?" }, { "from": "gpt", "value": "The answer is book" } ], "clip_score": 20.359375, "loss": 4.05569314956665 }, { "id": "identity_296067", "image": "ScienceQA/images/14859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic best describes this object?\nChoices:\n(A) stretchy\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 4.436305046081543 }, { "id": "identity_279781", "image": "AI2D/images/2235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the grasshopper from the given choices: A, I, F, C.\nChoices:\n(A) A\n(B) I\n(C) F\n(D) C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.609375, "loss": 4.1043596267700195 }, { "id": "identity_325987", "image": "A-OKVQA/images/212241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the wires above this for?\nChoices:\n(A) climbing\n(B) decoration\n(C) protection\n(D) power" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.20703125, "loss": 4.124627113342285 }, { "id": "identity_29697", "image": "A-OKVQA/images/284902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is most of the fruit?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) orange" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.7109375, "loss": 4.005824089050293 }, { "id": "identity_199385", "image": "PMC-VQA/images/PMC3212535_pone-0027190-g002_115008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many instances of the letter 'T' can be found in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.75, "loss": 4.071646213531494 }, { "id": "identity_300780", "image": "A-OKVQA/images/518127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What else might you expect to see in this large venue with big balls?\nChoices:\n(A) soccer match\n(B) golf match\n(C) swimming competition\n(D) theater play" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.265625, "loss": 4.227809906005859 }, { "id": "identity_294010", "image": "A-OKVQA/images/6074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which activity is the truck currently participating?\nChoices:\n(A) parade\n(B) boycott\n(C) protest\n(D) strike" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.03125, "loss": 4.37155818939209 }, { "id": "identity_310375", "image": "TQA/images/question_images/human_system_eye_6131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Surrounding the macula\n(B) Surrounding the iris\n(C) Surrounding the vitreous gel\n(D) Surrounding the pupil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.6875, "loss": 4.044195652008057 }, { "id": "identity_295319", "image": "TQA/images/question_images/earth_eclipses_677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options comes between the moon and the sun: Pluto, Umbra, Earth, or Penumbra?\nChoices:\n(A) pluto\n(B) umbra\n(C) earth\n(D) penumbra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.640625, "loss": 4.207338809967041 }, { "id": "identity_31384", "image": "Super-CLEVR/images/superCLEVR_new_001359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a blue chopper; are there any purple objects left of it?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.25, "loss": 5.171682357788086 }, { "id": "identity_190169", "image": "TextVQA/images/bda84bf5acace54b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the product with the yellow price tag of 9.99?" }, { "from": "gpt", "value": "The answer is FERTA" } ], "clip_score": 22.28125, "loss": 5.035608768463135 }, { "id": "identity_313614", "image": "PMC-VQA/images/PMC8129905_Ch1.F1_444244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is suggested by the observed hypoattenuating material?\nChoices:\n(A) Joint inflammation\n(B) Joint deformity\n(C) Joint alignment\n(D) Joint fusion" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 4.467084884643555 }, { "id": "identity_307758", "image": "PMC-VQA/images/PMC4604108_Fig1_432942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the S-shaped red line represent?\nChoices:\n(A) Development of a muscle\n(B) Position of a body part\n(C) Position of another body part\n(D) Neither of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.515625, "loss": 4.554022789001465 }, { "id": "identity_215582", "image": "Super-CLEVR/images/superCLEVR_new_001992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is in front of the purple bus and to the right of the green jet?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 33.90625, "loss": 4.239694118499756 }, { "id": "identity_121984", "image": "VQA2.0/images/354045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the container to the left inside the store?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.734375, "loss": 4.139535427093506 }, { "id": "identity_269476", "image": "PMC-VQA/images/PMC5034412_Fig6_76656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the back of the specimen located in relation to the head?\nChoices:\n(A) Ventro-anterior part of the head \n(B) Dorso-anterior part of the head \n(C) Ventro posterior terminal part of the specimen \n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.640625, "loss": 4.530816555023193 }, { "id": "identity_293970", "image": "PMC-VQA/images/PMC3505571_Fig6_168350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which artery is primarily impacted by the thickening of the wall among the following options: left pulmonary artery, right pulmonary artery, pulmonary vein, or aorta?\nChoices:\n(A) left pulmonary artery \n(B) right pulmonary artery \n(C) pulmonary vein \n(D) aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.265625, "loss": 4.232770919799805 }, { "id": "identity_3329", "image": "PMC-VQA/images/PMC8068274_jpm-11-00285-f007_425404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) MRI of the abdomen \n(B) CT scan of the chest \n(C) X-ray of the leg \n(D) PET scan of the brain. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.578125, "loss": 4.221930027008057 }, { "id": "identity_288485", "image": "A-OKVQA/images/168405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for bananas hanging?\nChoices:\n(A) easier access\n(B) provide shade\n(C) looks nicer\n(D) slow ripening" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.078125, "loss": 4.033808708190918 }, { "id": "identity_124913", "image": "A-OKVQA/images/173161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural feature can be seen through the trees?" }, { "from": "gpt", "value": "The answer is Church windows" } ], "clip_score": 15.4921875, "loss": 4.045266628265381 }, { "id": "identity_46612", "image": "DVQA/images/bar_train_00114954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object category has the highest number of people preferring 'store'?" }, { "from": "gpt", "value": "The answer is feel" } ], "clip_score": 28.65625, "loss": 4.523379325866699 }, { "id": "identity_223428", "image": "VizWiz/images/VizWiz_train_00012852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the water bottle placed on?" }, { "from": "gpt", "value": "The answer is A gray fabric surface" } ], "clip_score": 31.046875, "loss": 4.629476547241211 }, { "id": "identity_231853", "image": "PMC-VQA/images/PMC1839090_F1_10209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the anatomical landmarks visible in the X-ray, and how do they help in identifying the location of the osteochondroma in the scapula?" }, { "from": "gpt", "value": "The answer is Inferior aspect of the anterior scapula" } ], "clip_score": 30.796875, "loss": 4.021263599395752 }, { "id": "identity_10433", "image": "PMC-VQA/images/PMC7435725_materials-13-03400-f006_237018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the left column? \nChoices:\n(A) Higher magnifications \n(B) Lower magnifications \n(C) Blends \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.671875, "loss": 4.608933448791504 }, { "id": "identity_264754", "image": "ScienceQA/images/18491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism in the food web contains matter that ultimately transfers to the earthworm? Is it the Arctic fox, mushroom, grizzly bear, or lichen?\nChoices:\n(A) Arctic fox\n(B) mushroom\n(C) grizzly bear\n(D) lichen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5, "loss": 4.343331813812256 }, { "id": "identity_207393", "image": "TextVQA/images/36714b339aaf9c61.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.796875, "loss": 4.522249698638916 }, { "id": "identity_296476", "image": "PMC-VQA/images/PMC6269335_Fig20_404805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the MRI image?\nChoices:\n(A) Spinal cord injury.\n(B) Brain tumor.\n(C) Hearing loss.\n(D) Broken bone." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.03125, "loss": 4.182865142822266 }, { "id": "identity_19290", "image": "DVQA/images/bar_train_00125033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is river" } ], "clip_score": 15.828125, "loss": 4.69234561920166 }, { "id": "identity_336374", "image": "A-OKVQA/images/437967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meat in this dish?\nChoices:\n(A) chicken\n(B) beef\n(C) pork chop\n(D) salmon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.46875, "loss": 4.232290744781494 }, { "id": "identity_212648", "image": "A-OKVQA/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the couch?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.625, "loss": 4.018221855163574 }, { "id": "identity_272837", "image": "Super-CLEVR/images/superCLEVR_new_001375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the matte dirtbike the same size as the gray object located to the left of the blue metal bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.1875, "loss": 5.012181758880615 }, { "id": "identity_286323", "image": "PMC-VQA/images/PMC4858846_Fig15_28378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in image c?\nOptions:\n(A) 3D segmentation with proposed refinement\n(B) 2D segmentation\n(C) CT image sequence\n(D) No segmentation is shown" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.53125, "loss": 4.028636932373047 }, { "id": "identity_212203", "image": "A-OKVQA/images/308760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of building is depicted in the picture on the box behind the large toy?" }, { "from": "gpt", "value": "The answer is A traditional Chinese building" } ], "clip_score": 22.375, "loss": 4.139049530029297 }, { "id": "identity_189175", "image": "PMC-VQA/images/PMC8003397_cancers-13-01396-f007_404170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify the presence of any foreign bodies in the pelvic area?" }, { "from": "gpt", "value": "The answer is No foreign bodies identified" } ], "clip_score": 27.96875, "loss": 4.254830360412598 }, { "id": "identity_286071", "image": "TextVQA/images/3de5c3861d55a253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letters are the first 2 on the license plate?" }, { "from": "gpt", "value": "The answer is gmail" } ], "clip_score": 16.1875, "loss": 5.872110843658447 }, { "id": "identity_278612", "image": "PMC-VQA/images/PMC4270221_fig02_345499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which procedure was utilized to acquire this image? \nOptions:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.921875, "loss": 4.089284420013428 }, { "id": "identity_108509", "image": "TextVQA/images/ba1ceba6dd13cfd1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the microwave placed on?" }, { "from": "gpt", "value": "The answer is A white platform" } ], "clip_score": 25.765625, "loss": 4.128807544708252 }, { "id": "identity_324772", "image": "AI2D/images/3792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter labels the petiole?\nChoices:\n(A) M\n(B) L\n(C) I\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.96875, "loss": 4.669224739074707 }, { "id": "identity_273135", "image": "FigureQA/images/98628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in shade, Firebrick or Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.359375, "loss": 4.065169334411621 }, { "id": "identity_324179", "image": "TQA/images/question_images/earth_poles_168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which does not describe this?\nChoices:\n(A) sun never sets\n(B) sun never rises\n(C) long days\n(D) short days" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.921875, "loss": 4.11602258682251 }, { "id": "identity_336993", "image": "PMC-VQA/images/PMC4109464_F2_308442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aspect of the body parts is depicted in the SEM images?\nChoices:\n(A) Anterior\n(B) Posterior\n(C) Lateral\n(D) Dorsal" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.03125, "loss": 4.2588653564453125 }, { "id": "identity_27165", "image": "PMC-VQA/images/PMC3328163_fig4_134167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the initial axial unenhanced CT?\nChoices:\n(A) Mass in the left temporal lobe. \n(B) Low attenuation in the left middle cerebral artery territory. \n(C) High attenuation in the right middle cerebral artery territory. \n(D) An enlarged ventricle. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.453125, "loss": 4.563351154327393 }, { "id": "identity_303161", "image": "AI2D/images/4276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the layer labeled F?\nChoices:\n(A) seal\n(B) sandstone\n(C) gas-rich shale\n(D) coalbed methane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.0, "loss": 4.2930426597595215 }, { "id": "identity_292675", "image": "AI2D/images/4398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following objects is needle-like in shape? \n(A) G\n(B) A\n(C) D\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.064394950866699 }, { "id": "identity_325194", "image": "IconQA/images/6339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape shown here?\nChoices:\n(A) sphere\n(B) cone\n(C) cylinder\n(D) cube" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.203125, "loss": 4.6369428634643555 }, { "id": "identity_57184", "image": "DVQA/images/bar_train_00048336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the top?" }, { "from": "gpt", "value": "The answer is set" } ], "clip_score": 16.25, "loss": 4.004543304443359 }, { "id": "identity_270338", "image": "A-OKVQA/images/63043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose did the man attach ropes to the cattle?\nChoices:\n(A) to lead\n(B) to ride\n(C) to punish\n(D) to kill" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.859375, "loss": 4.239894390106201 }, { "id": "identity_315300", "image": "AI2D/images/2166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by label C?\nChoices:\n(A) Whitehead\n(B) Fungus overwintering on roots as mycelium\n(C) Runner hyphae on roots\n(D) Severe infection of plant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.4375, "loss": 4.378125190734863 }, { "id": "identity_19532", "image": "FigureQA/images/47015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.21875, "loss": 4.229457855224609 }, { "id": "identity_18798", "image": "IconQA/images/55094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a circle as a face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.75, "loss": 4.39256477355957 }, { "id": "identity_4611", "image": "A-OKVQA/images/198545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of all these objects?\nChoices:\n(A) good luck\n(B) decorative\n(C) hiding\n(D) for sale" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.390625, "loss": 4.813609600067139 }, { "id": "identity_22730", "image": "IconQA/images/81862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) sphere\n(B) cylinder\n(C) cone\n(D) cube" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.335030555725098 }, { "id": "identity_142523", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who has forwarded the meeting request to additional recipients?" }, { "from": "gpt", "value": "The answer is Kime, Robin" } ], "clip_score": 24.03125, "loss": 5.092155933380127 }, { "id": "identity_30946", "image": "PMC-VQA/images/PMC6411966_f0030_448640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to generate the spectra? \nChoices:\n(A) CT scan \n(B) Mammography \n(C) MRI \n(D) X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 4.176976203918457 }, { "id": "identity_26630", "image": "A-OKVQA/images/367608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of skate trick is the man doing?\nChoices:\n(A) flip\n(B) grind\n(C) grab\n(D) manual" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.859375, "loss": 4.3298659324646 }, { "id": "identity_61099", "image": "DocVQA/images/tqjp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person representing the book shop?" }, { "from": "gpt", "value": "The answer is Leonita Baum" } ], "clip_score": 17.359375, "loss": 5.422740936279297 }, { "id": "identity_24429", "image": "PMC-VQA/images/PMC7397898_vms3273-fig-0002_224754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the asterisk in the image? \nChoices:\n(A) The left retroperitoneum \n(B) The right circumvaval ureter \n(C) The caudal vena cava \n(D) The mid abdomen. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.231632709503174 }, { "id": "identity_293589", "image": "TQA/images/question_images/periodic_table_8160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What element is represented by the letter N?\nChoices:\n(A) hydrogen\n(B) nitrogen\n(C) magnesium\n(D) sodium" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.40625, "loss": 4.341929912567139 }, { "id": "identity_285237", "image": "PMC-VQA/images/PMC8126415_DEV194597F1_443014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red signals shown in the images?\nChoices:\n(A) Nuclei in S phase \n(B) Nuclei in G1/G0 phase \n(C) Nuclei in G2/M phase \n(D) Mitotic cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.34375, "loss": 4.27305793762207 }, { "id": "identity_300351", "image": "DVQA/images/bar_train_00006716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.40625, "loss": 4.198252201080322 }, { "id": "identity_33934", "image": "PMC-VQA/images/PMC7393207_F2_222998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of imaging used in the study? \nChoices:\n(A) Computed Tomography \n(B) Magnetic Resonance Imaging \n(C) X-Ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.75, "loss": 4.32053279876709 }, { "id": "identity_276353", "image": "PMC-VQA/images/PMC7500020_Fig1_256604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of radiograph displayed in the image:\n(A) AP radiograph\n(B) Lateral radiograph\n(C) Submentovertex radiograph\n(D) Axial radiograph" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.078125, "loss": 4.037201881408691 }, { "id": "identity_327191", "image": "PMC-VQA/images/PMC3177912_F8_109376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in this image?\nChoices:\n(A) Alveolar wall.\n(B) Type I endothelial cell.\n(C) Type II epithelial cell.\n(D) Capillary." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.625, "loss": 5.069167613983154 }, { "id": "identity_288250", "image": "PMC-VQA/images/PMC7939179_F4_385961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the green arrow shown in the image?\nChoices:\n(A) A fracture in the skull \n(B) A low-density shadow at frontal lobe \n(C) A high-density shadow at basal ganglia \n(D) A high-density shadow at cerebral cortex" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.75, "loss": 4.310858726501465 }, { "id": "identity_132532", "image": "A-OKVQA/images/201925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any specific feature of the oven based on the image?" }, { "from": "gpt", "value": "The answer is Glass door and rounded handle" } ], "clip_score": 25.359375, "loss": 4.435249328613281 }, { "id": "identity_5880", "image": "IconQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rectangles even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.375, "loss": 4.182217121124268 }, { "id": "identity_10074", "image": "ScienceQA/images/13092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of the kelp?\nChoices:\n(A) zooplankton\n(B) plainfin midshipman\n(C) sea urchin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.0625, "loss": 4.165854454040527 }, { "id": "identity_28058", "image": "A-OKVQA/images/187822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the person wearing red shorts do?\nChoices:\n(A) quit\n(B) go down\n(C) go home\n(D) skate up" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.71875, "loss": 4.5554609298706055 }, { "id": "identity_311025", "image": "ScienceQA/images/21188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) stretchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.953125, "loss": 5.074179649353027 }, { "id": "identity_300658", "image": "Super-CLEVR/images/superCLEVR_new_001482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the aircraft to the left of the small metal utility bike the same size as the bus to the right of the metal utility bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.71875, "loss": 4.9762468338012695 }, { "id": "identity_8500", "image": "ScienceQA/images/2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) slippery\n(B) blue\n(C) stretchy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.921875, "loss": 4.083061218261719 }, { "id": "identity_328459", "image": "A-OKVQA/images/289883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What style design connects the two parts of the bus here?\nChoices:\n(A) invisible\n(B) bolts\n(C) accordion\n(D) rope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.421875, "loss": 4.366686820983887 }, { "id": "identity_32834", "image": "TextVQA/images/9885ee997e817f37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the number on the diabetic testing machine?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.5, "loss": 4.004523277282715 }, { "id": "identity_131429", "image": "VQA-AS/images/1593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cats are present in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.78125, "loss": 4.3333024978637695 }, { "id": "identity_35627", "image": "PMC-VQA/images/PMC8833544_animals-12-00349-f001_189689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible at the bottom right corner of the image? \nChoices:\n(A) gas content \n(B) fluid content \n(C) fecal matter \n(D) tumor " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.625, "loss": 4.8300299644470215 }, { "id": "identity_33301", "image": "PMC-VQA/images/PMC5547522_Fig2_200908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black arrow in the given image? \nChoices:\n(A) mass \n(B) duct expansion \n(C) cyst \n(D) necrosis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.9375, "loss": 5.334682941436768 }, { "id": "identity_262760", "image": "TQA/images/question_images/cycle_rock_6733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After erosion and weathering, what do igneous rocks change into?\nChoices:\n(A) Metamorphic rocks\n(B) Sediments\n(C) Magma\n(D) Volcanic eruptions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 4.089954853057861 }, { "id": "identity_35243", "image": "DocVQA/images/llcv0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the schedule at the time of 9.00 - 9.15 as mentioned in the given agenda ?" }, { "from": "gpt", "value": "The answer is Introduction" } ], "clip_score": 23.640625, "loss": 4.455845832824707 }, { "id": "identity_15806", "image": "FigureQA/images/66744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Salmon have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.765625, "loss": 4.122961521148682 }, { "id": "identity_205756", "image": "A-OKVQA/images/101636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are mounted on the car roof?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.015625, "loss": 4.0602593421936035 }, { "id": "identity_267307", "image": "IconQA/images/88462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored purple? \n(A) 2/8 \n(B) 1/10 \n(C) 1/8 \n(D) 1/6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.28125, "loss": 4.01560115814209 }, { "id": "identity_10679", "image": "PMC-VQA/images/PMC6970096_FIG2_102919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view shows the knee effusion more clearly? \nChoices:\n(A) Sagittal view \n(B) Transverse view \n(C) Both views show it equally clearly \n(D) No view shows it clearly " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.34375, "loss": 4.403072357177734 }, { "id": "identity_23840", "image": "TextVQA/images/20babd71404bfa95.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the knob on the left control?" }, { "from": "gpt", "value": "The answer is feedback and rotate" } ], "clip_score": 19.15625, "loss": 4.11558723449707 }, { "id": "identity_21376", "image": "PMC-VQA/images/PMC9002522_polymers-14-01462-f002_251254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the resin tags look like under high magnification? \nChoices:\n(A) Short, thin, and sparse \n(B) Long, dense, and thick \n(C) Large and irregular shaped \n(D) Colorful and fluorescent " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.9375, "loss": 4.203314304351807 }, { "id": "identity_196587", "image": "A-OKVQA/images/95841.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.9375, "loss": 4.5168867111206055 }, { "id": "identity_283317", "image": "TQA/images/question_images/parts_skeleton_7254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bony structure typically forms the head of the skeleton in most vertebrates?\nChoices:\n(A) Ulna\n(B) Skull\n(C) Sternum\n(D) Pelvis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 4.006657123565674 }, { "id": "identity_279363", "image": "AI2D/images/1184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the core of the Nucleus?\nChoices:\n(A) Centrioles\n(B) Nucleolus\n(C) cytoplasm\n(D) ribosomes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.140625, "loss": 4.188136100769043 }, { "id": "identity_326557", "image": "A-OKVQA/images/412094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the train close to?\nChoices:\n(A) cow\n(B) building\n(C) wheelbarrow\n(D) freeway" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.715948104858398 }, { "id": "identity_298523", "image": "TextVQA/images/640dae20e17a25c6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the content or subject of this flyer?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 21.171875, "loss": 5.2552618980407715 }, { "id": "identity_28657", "image": "A-OKVQA/images/329258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word is the person in the air most familiar with?\nChoices:\n(A) enzuiguiri\n(B) kickflip\n(C) quark\n(D) ad hoc" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.75, "loss": 4.213019371032715 }, { "id": "identity_23586", "image": "A-OKVQA/images/201069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is most likely inside of the building next to the cars?\nChoices:\n(A) washing machine\n(B) firemen\n(C) clowns\n(D) atm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.3125, "loss": 4.761399269104004 }, { "id": "identity_328614", "image": "PMC-VQA/images/PMC3784059_Fig3_233800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images?\nChoices:\n(A) MRI scan\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 5.241555213928223 }, { "id": "identity_50154", "image": "PlotQA/images/34037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the duration of primary education less than 6 years?" }, { "from": "gpt", "value": "The answer is No year" } ], "clip_score": 26.890625, "loss": 4.1962714195251465 }, { "id": "identity_334010", "image": "PMC-VQA/images/PMC6723115_F1_33045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white dotted line in the image indicate?\nChoices:\n(A) The basal surface \n(B) The lateral surface \n(C) The apical surface \n(D) The luminal surface." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.125, "loss": 4.55258846282959 }, { "id": "identity_124218", "image": "VQA-AS/images/6526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four people" } ], "clip_score": 22.421875, "loss": 4.199140548706055 }, { "id": "identity_28861", "image": "A-OKVQA/images/137496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the attendant to pump the gas?\nChoices:\n(A) there's none\n(B) inside plane\n(C) already left\n(D) inside booth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.96875, "loss": 4.3380022048950195 }, { "id": "identity_213102", "image": "VQA-AS/images/19051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many doors are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.515625, "loss": 4.099316596984863 }, { "id": "identity_334481", "image": "IconQA/images/60708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.796875, "loss": 4.006666660308838 }, { "id": "identity_39380", "image": "DocVQA/images/gqlp0000_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the distributor mentioned in the document?" }, { "from": "gpt", "value": "The answer is Sosnick - Fresno" } ], "clip_score": 27.421875, "loss": 4.578822135925293 }, { "id": "identity_202785", "image": "VQA2.0/images/319682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the main body of the horses?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 26.578125, "loss": 4.063388347625732 }, { "id": "identity_71402", "image": "ScienceQA/images/11766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the hole in the clothespin?" }, { "from": "gpt", "value": "The answer is To secure the spring and reduce material" } ], "clip_score": 29.6875, "loss": 4.64353084564209 }, { "id": "identity_305150", "image": "Super-CLEVR/images/superCLEVR_new_000241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the large shiny object the same as the brown shiny thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.5625, "loss": 5.355840682983398 }, { "id": "identity_10575", "image": "A-OKVQA/images/100726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the people here enjoy?\nChoices:\n(A) gaming\n(B) exreme sports\n(C) sales\n(D) dancing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.3828125, "loss": 4.2873735427856445 }, { "id": "identity_265888", "image": "TQA/images/question_images/human_system_circulatory_3622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body connects the lungs to the heart?\nChoices:\n(A) aorta\n(B) Systemic veins\n(C) right atrium\n(D) right ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.484375, "loss": 4.313592910766602 }, { "id": "identity_12757", "image": "FigureQA/images/67969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Purple have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.401728630065918 }, { "id": "identity_125846", "image": "TextVQA/images/61408ab56f2fb616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the circled icon on the smartphone screen?" }, { "from": "gpt", "value": "The answer is To capture a screenshot or note" } ], "clip_score": 17.4375, "loss": 4.808985710144043 }, { "id": "identity_304069", "image": "A-OKVQA/images/71608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the people in uniforms standing here?\nChoices:\n(A) street workers\n(B) entertainment\n(C) doctors\n(D) security" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.5625, "loss": 4.216811180114746 }, { "id": "identity_17297", "image": "Super-CLEVR/images/superCLEVR_new_001852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the shiny object that is behind the brown object and in front of the tiny yellow metal thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 18.09375, "loss": 4.24744176864624 }, { "id": "identity_295755", "image": "A-OKVQA/images/284160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of individuals is most commonly seen riding this mode of transportation? \n(A) cable workers\n(B) prisoners\n(C) tourists\n(D) commuters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.78125, "loss": 4.365687370300293 }, { "id": "identity_71676", "image": "AI2D/images/2674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seasons are depicted in the illustration?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.984375, "loss": 4.140225410461426 }, { "id": "identity_326143", "image": "A-OKVQA/images/422877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is causing the cars to appear blurred?\nChoices:\n(A) old photograph\n(B) bad camera\n(C) high speed\n(D) are melting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 4.5837554931640625 }, { "id": "identity_316582", "image": "ScienceQA/images/20088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) stretchy\n(B) transparent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.610289573669434 }, { "id": "identity_321550", "image": "ScienceQA/images/11541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Saint Vincent and the Grenadines\n(B) Barbados\n(C) Saint Lucia\n(D) Cuba" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.390625, "loss": 4.3497843742370605 }, { "id": "identity_36813", "image": "PMC-VQA/images/PMC8413751_brb32180-fig-0001_50741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of brain tumor is seen in the image?\nChoices:\n(A) Meningioma \n(B) Glioblastoma \n(C) Schwannoma \n(D) Pituitary adenoma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.796875, "loss": 4.6668596267700195 }, { "id": "identity_291010", "image": "A-OKVQA/images/132760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man in the suit and hat engaged in?\nChoices:\n(A) comic relief\n(B) dancing\n(C) judging horses\n(D) lookalike contest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.453125, "loss": 4.3908209800720215 }, { "id": "identity_316794", "image": "ScienceQA/images/19841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Montgomery\n(B) Columbus\n(C) Cincinnati\n(D) Cleveland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.21875, "loss": 4.221587181091309 }, { "id": "identity_11817", "image": "AI2D/images/2602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, what letter represents the High tide?\nChoices:\n(A) E\n(B) F\n(C) A\n(D) G" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.03125, "loss": 4.138631343841553 }, { "id": "identity_323538", "image": "ScienceQA/images/4426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common characteristic do these three items share?\nChoices:\n(A) sour\n(B) translucent\n(C) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.339243412017822 }, { "id": "identity_37998", "image": "PMC-VQA/images/PMC4516413_F1_409690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gyrus is affected by the cortical atrophy? \nChoices:\n(A) Inferior frontal gyrus \n(B) Precentral gyrus \n(C) Superior temporal gyrus \n(D) Fusiform gyrus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.875, "loss": 4.168357849121094 }, { "id": "identity_316311", "image": "PMC-VQA/images/PMC4422723_pone.0123975.g003_383982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used for these axial slices?\nChoices:\n(A) Ultrasound \n(B) PET scan \n(C) MRI scan \n(D) CT scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.375, "loss": 4.726975917816162 }, { "id": "identity_36434", "image": "TQA/images/abc_question_images/photosynthesis_11263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With which letter are chloroplasts represented in this diagram?\nChoices:\n(A) X\n(B) P\n(C) E\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 4.023921012878418 }, { "id": "identity_327970", "image": "PMC-VQA/images/PMC6940314_Fig3_93321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict in patient 2?\nChoices:\n(A) CT scan\n(B) DWI and 68Ga-PSMA-PET scan fusion\n(C) 68Ga-PSMA-PET scan only\n(D) DWI scan only" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.734375, "loss": 4.288582801818848 }, { "id": "identity_294671", "image": "PMC-VQA/images/PMC5086477_Fig2_89351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the PET-CT scan? \nChoices:\n(A) Blood circulation\n(B) Abnormal uptake of glucose by the mass\n(C) Normal uptake of glucose by the mass\n(D) Abnormal movement of the mass." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.328125, "loss": 4.626885414123535 }, { "id": "identity_32164", "image": "Super-CLEVR/images/superCLEVR_new_001238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the metal object in front of the big brown object that is in front of the cyan object behind the large rubber jet?" }, { "from": "gpt", "value": "The answer is suv" } ], "clip_score": 22.140625, "loss": 4.300599575042725 }, { "id": "identity_129515", "image": "VQA2.0/images/423023.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the tablecloth on the table in the foreground?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.3125, "loss": 4.029624938964844 }, { "id": "identity_109814", "image": "A-OKVQA/images/555910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the container with a spout, located to the left of the yellow bottle on the countertop?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.5625, "loss": 4.38712215423584 }, { "id": "identity_337647", "image": "A-OKVQA/images/169116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the person on the right feeling in this moment?\nChoices:\n(A) amused\n(B) disturbed\n(C) sad\n(D) angry" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7109375, "loss": 4.428832530975342 }, { "id": "identity_14412", "image": "A-OKVQA/images/4554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What skill level do most snowboarders here have?\nChoices:\n(A) beginners\n(B) professional\n(C) olympic\n(D) competitive" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 4.360544681549072 }, { "id": "identity_330633", "image": "ScienceQA/images/14700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Pennsylvania\n(B) Connecticut\n(C) New York\n(D) Massachusetts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.703670024871826 }, { "id": "identity_321911", "image": "IconQA/images/46125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a moon. What is the eighth image?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 4.110654830932617 }, { "id": "identity_223409", "image": "TextVQA/images/7a7e61e43f975656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person carrying a mattress?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.25, "loss": 4.498110771179199 }, { "id": "identity_62127", "image": "DocVQA/images/qpvw0217_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the article mentioned in the tracking report?" }, { "from": "gpt", "value": "The answer is Progestins in HRT" } ], "clip_score": 22.609375, "loss": 4.570577144622803 }, { "id": "identity_309659", "image": "PMC-VQA/images/PMC6065269_F1_347427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is shown in this image?\nChoices:\n(A) Anteroposterior radiograph\n(B) Lateral radiograph\n(C) Oblique radiograph\n(D) CT scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.375, "loss": 4.381447792053223 }, { "id": "identity_324618", "image": "TQA/images/abc_question_images/earth_moon_phases_11247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these shows the waxing gibbous?\nChoices:\n(A) W\n(B) Y\n(C) A\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.15625, "loss": 4.480917930603027 }, { "id": "identity_313769", "image": "ScienceQA/images/1426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Cedar Rapids\n(B) Frankfort\n(C) Des Moines\n(D) Davenport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.46875, "loss": 4.34044075012207 }, { "id": "identity_324869", "image": "PMC-VQA/images/PMC4979667_jmor20370-fig-0003_61456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this image show?\nChoices:\n(A) Internal structure of something,\n(B) External surface of something,\n(C) Cross-section of something,\n(D) SEM image of a tile." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.682686805725098 }, { "id": "identity_321412", "image": "A-OKVQA/images/261426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the sandwich that is highest up?\nChoices:\n(A) black olives\n(B) toothpick\n(C) giant eggs\n(D) carrots" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.59375, "loss": 4.195334434509277 }, { "id": "identity_276310", "image": "A-OKVQA/images/442367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is this in town? Is it a shopping area, industrial area, residential area, or office buildings?\nChoices:\n(A) shopping area\n(B) industrial\n(C) residential area\n(D) office buildings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.578125, "loss": 4.358701705932617 }, { "id": "identity_298627", "image": "TextVQA/images/1b9e29f852b3fec2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of products are available for sale at this location?" }, { "from": "gpt", "value": "The answer is comic pen" } ], "clip_score": 27.3125, "loss": 5.498067378997803 }, { "id": "identity_318554", "image": "PMC-VQA/images/PMC7229234_fig0010_171770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the urethrography x-ray show in this area? \nChoices:\n(A) Enlarged prostate gland (arrow) \n(B) Normal urethra with no evidence of stricture (arrow) \n(C) Multiple preputial stones (arrow) \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.78125, "loss": 4.480557441711426 }, { "id": "identity_524", "image": "ScienceQA/images/3740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) salty\n(B) yellow\n(C) hard" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.9453125, "loss": 4.293421745300293 }, { "id": "identity_325948", "image": "A-OKVQA/images/88854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is this?\nChoices:\n(A) lodge party\n(B) race\n(C) ski party\n(D) bunny hop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.046875, "loss": 4.208296298980713 }, { "id": "identity_199096", "image": "VQA2.0/images/560173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys can be seen on the yellow boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.46875, "loss": 4.121042251586914 }, { "id": "identity_2194", "image": "VizWiz/images/VizWiz_train_00006664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What frequency is my stereo radio tuned to?" }, { "from": "gpt", "value": "The answer is rca frequency" } ], "clip_score": 35.125, "loss": 4.166104316711426 }, { "id": "identity_1377", "image": "A-OKVQA/images/571950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is under this man?\nChoices:\n(A) water\n(B) shadow\n(C) grass\n(D) sand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0234375, "loss": 4.549965858459473 }, { "id": "identity_326010", "image": "A-OKVQA/images/45222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the company that makes this beverage located?\nChoices:\n(A) philadelphia\n(B) des moines\n(C) newport\n(D) miami" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.9140625, "loss": 4.147690773010254 }, { "id": "identity_290927", "image": "A-OKVQA/images/142667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are they currently engaged in?\nChoices:\n(A) eating breakfast\n(B) standing line\n(C) cleaning up\n(D) waiting buss" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.734375, "loss": 4.301214694976807 }, { "id": "identity_300495", "image": "FigureQA/images/67925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.78125, "loss": 4.119021415710449 }, { "id": "identity_152650", "image": "AI2D/images/4716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaf attachments have a visible petiole in the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 35.21875, "loss": 4.629722595214844 }, { "id": "identity_103725", "image": "A-OKVQA/images/196510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles in the image have visible price tags attached?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.546875, "loss": 4.410428047180176 }, { "id": "identity_241838", "image": "VQA-AS/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds is the boy looking at, and what other objects are near the birds?" }, { "from": "gpt", "value": "The answer is Two birds and pieces of bread" } ], "clip_score": 23.1875, "loss": 4.143657684326172 }, { "id": "identity_282094", "image": "ScienceQA/images/17320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\n(A) Saint Paul\n(B) Minneapolis\n(C) Montgomery\n(D) New York City" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.53125, "loss": 4.178610801696777 }, { "id": "identity_293018", "image": "A-OKVQA/images/80078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which section of the grocery store is this? \nOptions:\n(A) frozen\n(B) vegetables\n(C) dairy\n(D) fruits" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.6875, "loss": 4.01680326461792 }, { "id": "identity_330765", "image": "PMC-VQA/images/PMC5815277_F7_276513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the fluorescence imaging?\nChoices:\n(A) Cell division \n(B) Quantum dots \n(C) 3D structure \n(D) MSCs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.765625, "loss": 4.901935577392578 }, { "id": "identity_190054", "image": "VQA2.0/images/576158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the backpack of the person walking?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.5625, "loss": 4.480950355529785 }, { "id": "identity_212343", "image": "VQA2.0/images/319521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking on the path in the background?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 15.640625, "loss": 4.196867942810059 }, { "id": "identity_260764", "image": "ScienceQA/images/9305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these three objects have in common, and how does this property affect their use in everyday life?" }, { "from": "gpt", "value": "The answer is Versatile and useful for holding, cooking, and entertainment" } ], "clip_score": 18.65625, "loss": 4.114482879638672 }, { "id": "identity_216792", "image": "A-OKVQA/images/389021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 14.78125, "loss": 4.956850051879883 }, { "id": "identity_183487", "image": "PMC-VQA/images/PMC7367244_Fig1_215713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any blood vessels or nerves in close proximity to the mass?" }, { "from": "gpt", "value": "The answer is Blood vessels visible" } ], "clip_score": 21.125, "loss": 4.7683868408203125 }, { "id": "identity_331193", "image": "PMC-VQA/images/PMC8464884_jcm-10-04057-f002_66122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being measured in these scans?\nChoices:\n(A) Tooth alignment \n(B) Retromolar space \n(C) Gum thickness \n(D) Cavity depth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.796875, "loss": 4.251669406890869 }, { "id": "identity_61861", "image": "DocVQA/images/nrnj0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the video to be shown at 9:50?" }, { "from": "gpt", "value": "The answer is DRYDEN FILE II" } ], "clip_score": 22.140625, "loss": 4.080229759216309 }, { "id": "identity_28349", "image": "A-OKVQA/images/423341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is the person in the middle lane using?\nChoices:\n(A) bus\n(B) motorcycle\n(C) car\n(D) bicycle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.75, "loss": 4.387507438659668 }, { "id": "identity_297357", "image": "PMC-VQA/images/PMC6909583_Fig1_83150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the finding in the upper right lung on the chest CT scan report? \nChoices:\n(A) mass with effusion \n(B) collapse \n(C) cavity with effusion \n(D) atelectasis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.078125, "loss": 4.160486221313477 }, { "id": "identity_111552", "image": "A-OKVQA/images/361572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of citrus fruits can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.703125, "loss": 4.446257591247559 }, { "id": "identity_293456", "image": "ScienceQA/images/17925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted section?\nChoices:\n(A) Tuvalu\n(B) the Marshall Islands\n(C) Vanuatu\n(D) Nauru" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.765625, "loss": 4.077610969543457 }, { "id": "identity_30851", "image": "TQA/images/abc_question_images/human_system_nervous_16172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter label refers to the cerebellum?\nChoices:\n(A) V\n(B) D\n(C) X\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.796875, "loss": 4.269402980804443 }, { "id": "identity_312874", "image": "TQA/images/question_images/blastocyst_9031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside this structure?\nChoices:\n(A) sac\n(B) embryo\n(C) placenta\n(D) inner cell mass" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.015625, "loss": 4.004478454589844 }, { "id": "identity_123502", "image": "TextVQA/images/ecb9e39f59ec4d90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many words are in the main text?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 10.34375, "loss": 5.138223171234131 }, { "id": "identity_128317", "image": "Super-CLEVR/images/superCLEVR_new_001298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest car in the image?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 21.40625, "loss": 4.188754081726074 }, { "id": "identity_140488", "image": "DocVQA/images/flwk0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the philosophy endorsed by the U.S. Surgeon General and the public health community mentioned in the text?" }, { "from": "gpt", "value": "The answer is Less ought to be better" } ], "clip_score": 23.375, "loss": 4.938222885131836 }, { "id": "identity_284658", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For whom are the street signs intended?\nChoices:\n(A) drivers\n(B) directions\n(C) downtown\n(D) pedestrians" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.875, "loss": 4.276678562164307 }, { "id": "identity_301307", "image": "A-OKVQA/images/563730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the green eating utensil next to the fruit?\nChoices:\n(A) knife\n(B) sporf\n(C) fork\n(D) spork" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 4.30454158782959 }, { "id": "identity_288602", "image": "A-OKVQA/images/490629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the woman wearing a triangular hat?\nChoices:\n(A) dress code\n(B) visibility\n(C) protection\n(D) camouflage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.296875, "loss": 4.605720520019531 }, { "id": "identity_294308", "image": "A-OKVQA/images/463665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird is the least likely to be the first to catch a bug from the ground among the following options? \n(A) rightmost bird\n(B) flying one\n(C) front most\n(D) far left" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.703125, "loss": 4.132999420166016 }, { "id": "identity_30632", "image": "DVQA/images/bar_train_00093829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the bottom?" }, { "from": "gpt", "value": "The answer is phone" } ], "clip_score": 18.21875, "loss": 4.562546730041504 }, { "id": "identity_25813", "image": "PMC-VQA/images/PMC5367806_pone.0174352.g003_145870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the CPR image of the LAD? \nChoices:\n(A) Calcified plaque \n(B) Non-calcified plaque \n(C) Blockage \n(D) No abnormalities seen " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.71875, "loss": 4.6210737228393555 }, { "id": "identity_152716", "image": "TQA/images/question_images/waves_interactions_interference_8230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What happens to the amplitude of waves A and B during interference?" }, { "from": "gpt", "value": "The answer is The amplitude becomes zero" } ], "clip_score": 28.90625, "loss": 4.20017147064209 }, { "id": "identity_92651", "image": "IconQA/images/101040.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object on the frame?" }, { "from": "gpt", "value": "The answer is Pentagon" } ], "clip_score": 22.71875, "loss": 4.330645561218262 }, { "id": "identity_268713", "image": "IconQA/images/7814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.608273983001709 }, { "id": "identity_5388", "image": "IconQA/images/78944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.65625, "loss": 4.311245918273926 }, { "id": "identity_296538", "image": "VQA2.0/images/81296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pants that the woman is wearing?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 12.625, "loss": 4.010068893432617 }, { "id": "identity_19913", "image": "DVQA/images/bar_train_00074330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.921875, "loss": 4.29895544052124 }, { "id": "identity_281926", "image": "PMC-VQA/images/PMC7040219_F1_123962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan reveal?\nChoices:\n(A) Pulmonary embolism \n(B) Pleural effusion \n(C) Pneumothorax \n(D) Lung nodules" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.8125, "loss": 4.307720184326172 }, { "id": "identity_302826", "image": "A-OKVQA/images/270198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the circular patterns visible on the surface?\nChoices:\n(A) abstract drawing\n(B) paint\n(C) tire tracks\n(D) cracks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2421875, "loss": 4.650672912597656 }, { "id": "identity_33679", "image": "PMC-VQA/images/PMC7343300_FIG2_207080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the bladder mass located?\nChoices:\n(A) Posterior \n(B) Lateral \n(C) Anterior \n(D) Medial" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.84375, "loss": 4.323343753814697 }, { "id": "identity_224592", "image": "VizWiz/images/VizWiz_train_00011688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of this metal tool, and describe how it is typically used?" }, { "from": "gpt", "value": "The answer is To open metal cans by cutting the lid" } ], "clip_score": 22.71875, "loss": 4.242658615112305 }, { "id": "identity_256725", "image": "A-OKVQA/images/559417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most common type of animal imagery found on traditional carousels, and how does this relate to the amusement ride shown in the image?" }, { "from": "gpt", "value": "The answer is horses" } ], "clip_score": 13.859375, "loss": 4.264039993286133 }, { "id": "identity_323569", "image": "A-OKVQA/images/92339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which entity do these horsemen work for?\nChoices:\n(A) german government\n(B) spanish government\n(C) british government\n(D) belgian government" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.609375, "loss": 4.028136253356934 }, { "id": "identity_106778", "image": "VQA-AS/images/16377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are visible near the tree?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.640625, "loss": 4.263888359069824 }, { "id": "identity_191363", "image": "PMC-VQA/images/PMC9107200_Fig1_282135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'D' indicate in the image?" }, { "from": "gpt", "value": "The answer is Duct or vessel" } ], "clip_score": 20.90625, "loss": 4.169986248016357 }, { "id": "identity_285045", "image": "Super-CLEVR/images/superCLEVR_new_000061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any shiny brown articulated buses be seen?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.625, "loss": 5.12135124206543 }, { "id": "identity_132421", "image": "A-OKVQA/images/432912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orange cones can be seen on the field?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.46875, "loss": 4.931659698486328 }, { "id": "identity_49313", "image": "FigureQA/images/4021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 35.78125, "loss": 4.258712291717529 }, { "id": "identity_274275", "image": "DVQA/images/bar_train_00015868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.890625, "loss": 4.324483394622803 }, { "id": "identity_305590", "image": "DVQA/images/bar_train_00018697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this bar?" }, { "from": "gpt", "value": "The answer is frame" } ], "clip_score": 13.9765625, "loss": 4.1119890213012695 }, { "id": "identity_287487", "image": "PMC-VQA/images/PMC7796587_Fig7_339434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the images, what does the red fluorescence signify?\nChoices:\n(A) DNA expression\n(B) Nitrotyrosine expression\n(C) Mitochondrial expression\n(D) No expression." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.875, "loss": 4.505076885223389 }, { "id": "identity_320186", "image": "AI2D/images/3971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the elongated things in its mouth?\nChoices:\n(A) none of the above\n(B) fangs\n(C) both above\n(D) horns" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.828125, "loss": 4.310213565826416 }, { "id": "identity_298164", "image": "A-OKVQA/images/134430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the small blue and white object located on the right side of the sink?\nChoices:\n(A) lamp\n(B) pen\n(C) soap\n(D) toothbrush" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.125, "loss": 4.1454339027404785 }, { "id": "identity_22154", "image": "A-OKVQA/images/544732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Passengers disembarking from the busses seen here might do what in the yellow canopied building?\nChoices:\n(A) catch rides\n(B) complain\n(C) dine\n(D) game" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.078125, "loss": 4.403393745422363 }, { "id": "identity_327894", "image": "A-OKVQA/images/406244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What wheeled object is the person using to perform the stunt?\nChoices:\n(A) motorcycle\n(B) skateboard\n(C) rollerblades\n(D) bike" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 4.068940162658691 }, { "id": "identity_152625", "image": "ScienceQA/images/5688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distinctive feature of the bird's legs visible in the image?" }, { "from": "gpt", "value": "The answer is Two-toed long legs adapted for running" } ], "clip_score": 31.9375, "loss": 4.243239879608154 }, { "id": "identity_304581", "image": "Super-CLEVR/images/superCLEVR_new_000696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray vehicle and the cyan motorcycle made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.75, "loss": 5.694680213928223 }, { "id": "identity_253767", "image": "A-OKVQA/images/3220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is likely taking place in the stadium, considering the presence of the mascot and the crowd?" }, { "from": "gpt", "value": "The answer is A baseball game" } ], "clip_score": 26.390625, "loss": 4.138469219207764 }, { "id": "identity_22528", "image": "A-OKVQA/images/358523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this image?\nChoices:\n(A) puzzle\n(B) photo\n(C) photoshopped picture\n(D) drawing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.734375, "loss": 4.678938388824463 }, { "id": "identity_15992", "image": "ScienceQA/images/17801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Indiana?\nChoices:\n(A) Springfield\n(B) Saint Paul\n(C) Indianapolis\n(D) Fort Wayne" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 4.211484432220459 }, { "id": "identity_301143", "image": "VizWiz/images/VizWiz_train_00002756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this scent?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 24.25, "loss": 4.01129674911499 }, { "id": "identity_16335", "image": "ScienceQA/images/2730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Missouri?\nChoices:\n(A) Louisville\n(B) Jefferson City\n(C) Columbus\n(D) Des Moines" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.131096839904785 }, { "id": "identity_187626", "image": "PMC-VQA/images/PMC6539121_ijms-20-02332-f002_486355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any pathological features in this liver tissue section?" }, { "from": "gpt", "value": "The answer is Pathological features identified, if any" } ], "clip_score": 29.140625, "loss": 4.1187286376953125 }, { "id": "identity_13707", "image": "AI2D/images/3474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the above diagram?\nChoices:\n(A) Tail\n(B) Anatomy of the Salamander\n(C) Squish\n(D) Boop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0625, "loss": 4.073664665222168 }, { "id": "identity_131550", "image": "A-OKVQA/images/52661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.234375, "loss": 4.401092529296875 }, { "id": "identity_308955", "image": "AI2D/images/3355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this red part of its body called?\nChoices:\n(A) leg\n(B) eye\n(C) elytra\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.90625, "loss": 4.536921501159668 }, { "id": "identity_293407", "image": "IconQA/images/32428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth one in the sequence, given that the first picture is a house?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.734375, "loss": 4.094071388244629 }, { "id": "identity_272124", "image": "VQA2.0/images/280059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what way does the red and brown box stand out from the rest?" }, { "from": "gpt", "value": "The answer is rest are brown" } ], "clip_score": 15.390625, "loss": 4.730278968811035 }, { "id": "identity_9258", "image": "TextVQA/images/7853c5792f2cbadd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what word is closest to the top of the page?" }, { "from": "gpt", "value": "The answer is ground" } ], "clip_score": 13.328125, "loss": 4.130619525909424 }, { "id": "identity_17106", "image": "Super-CLEVR/images/superCLEVR_new_001097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The small matte object that is to the left of the tiny car behind the cyan bus is what shape?" }, { "from": "gpt", "value": "The answer is chopper" } ], "clip_score": 29.140625, "loss": 5.00963020324707 }, { "id": "identity_306975", "image": "VQA2.0/images/119108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the ground wet?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.75, "loss": 4.493509769439697 }, { "id": "identity_64210", "image": "ScienceQA/images/16786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the bottle's opening?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 23.109375, "loss": 4.079883575439453 }, { "id": "identity_109485", "image": "A-OKVQA/images/58684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible inside the Apple store?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 27.3125, "loss": 4.61702299118042 }, { "id": "identity_297967", "image": "A-OKVQA/images/161871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the elephant putting its legs on the wheel?\nChoices:\n(A) to scratch\n(B) to flip\n(C) to kick\n(D) to mount" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.703125, "loss": 4.039071083068848 }, { "id": "identity_24645", "image": "A-OKVQA/images/571654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near the laptops?\nChoices:\n(A) orange\n(B) dog\n(C) cat\n(D) banana" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6015625, "loss": 4.109126091003418 }, { "id": "identity_42492", "image": "DocVQA/images/pllw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the marketing person responsible for the project?" }, { "from": "gpt", "value": "The answer is Cressida Lozano" } ], "clip_score": 18.5625, "loss": 4.0077643394470215 }, { "id": "identity_77819", "image": "AI2D/images/73.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the deer in this ecosystem as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Consumer" } ], "clip_score": 36.625, "loss": 4.254974842071533 }, { "id": "identity_186424", "image": "VQA2.0/images/472027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.140625, "loss": 4.264507293701172 }, { "id": "identity_328237", "image": "DocVQA/images/rxhm0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the executive secretary?" }, { "from": "gpt", "value": "The answer is channing H. Lushbough" } ], "clip_score": 16.5, "loss": 4.774707794189453 }, { "id": "identity_319080", "image": "A-OKVQA/images/153052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What position is the person in red kneepads on the field playing?\nChoices:\n(A) catcher\n(B) umpire\n(C) outfielder\n(D) first base" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.453125, "loss": 4.487905025482178 }, { "id": "identity_293992", "image": "A-OKVQA/images/274105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What accessory is the dog currently wearing?\nChoices:\n(A) glasses\n(B) leash\n(C) scarf\n(D) boots" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.828125, "loss": 4.244906425476074 }, { "id": "identity_301686", "image": "FigureQA/images/73848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.9453125, "loss": 4.115711688995361 }, { "id": "identity_249486", "image": "ScienceQA/images/10044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal, among the green silver-line and black widow spider, is adapted to be camouflaged among green leaves, and what specific features of this animal aid in its camouflage?" }, { "from": "gpt", "value": "The answer is Green wings and leaf-like patterns" } ], "clip_score": 25.28125, "loss": 4.14214563369751 }, { "id": "identity_329802", "image": "TextVQA/images/f8e57988876e154c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the handwriting say?" }, { "from": "gpt", "value": "The answer is the cancer card" } ], "clip_score": 15.8984375, "loss": 4.983654022216797 }, { "id": "identity_217121", "image": "TextVQA/images/6d3864294e00638e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are on the court wearing black shorts?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.71875, "loss": 4.716588497161865 }, { "id": "identity_292278", "image": "DocVQA/images/lxjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which project name is specified in the request form?" }, { "from": "gpt", "value": "The answer is Referral Smokers - Charlotte Tests" } ], "clip_score": 21.328125, "loss": 5.54161262512207 }, { "id": "identity_282731", "image": "IconQA/images/7897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture comes after the image of the moon in the sequence? \nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.1466474533081055 }, { "id": "identity_130188", "image": "Super-CLEVR/images/superCLEVR_new_000813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned closest to the camera?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 18.53125, "loss": 4.078885555267334 }, { "id": "identity_320007", "image": "DocVQA/images/qyxm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the schedule from 7:00 to 10:00 p.m. on the first day?" }, { "from": "gpt", "value": "The answer is Reception" } ], "clip_score": 19.109375, "loss": 4.386564254760742 }, { "id": "identity_2313", "image": "TextVQA/images/7058614605803dc8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what service does this offer?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.3203125, "loss": 4.012690544128418 }, { "id": "identity_127309", "image": "TextVQA/images/b2efb31f5c3c3404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word written on the envelope?" }, { "from": "gpt", "value": "The answer is Merkwurdig" } ], "clip_score": 25.640625, "loss": 4.088108539581299 }, { "id": "identity_146037", "image": "DocVQA/images/tsnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of Robert Thomas Dell'Orco's wife?" }, { "from": "gpt", "value": "The answer is Judith Marie" } ], "clip_score": 19.03125, "loss": 4.078887939453125 }, { "id": "identity_29824", "image": "PMC-VQA/images/PMC6538237_FIG2_486069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT scan show under the craniotomy bone flap? \nChoices:\n(A) Air collection \n(B) Frontal fluid collection \n(C) Brain abscess \n(D) Skull fracture " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.881020545959473 }, { "id": "identity_303160", "image": "FigureQA/images/11755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.984375, "loss": 4.4164276123046875 }, { "id": "identity_294651", "image": "PMC-VQA/images/PMC3446384_F6_156024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location was the imaging conducted? \nOptions:\n(A) Lower side of the filter \n(B) Inside the cell \n(C) Upper side of the filter \n(D) Outside of the cell" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.96875, "loss": 4.7187089920043945 }, { "id": "identity_1587", "image": "DVQA/images/bar_train_00169980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the eighth stack of bars from the left?" }, { "from": "gpt", "value": "The answer is game" } ], "clip_score": 14.0, "loss": 4.675200939178467 }, { "id": "identity_278548", "image": "Super-CLEVR/images/superCLEVR_new_001270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large yellow shiny object on the right side of the small brown bicycle the same shape as the small brown rubber object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.125, "loss": 5.321714878082275 }, { "id": "identity_264286", "image": "AI2D/images/3185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the outermost part of the cell?\n(A) none of the above\n(B) chloroplast\n(C) cell wall\n(D) ribosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.796875, "loss": 4.158318996429443 }, { "id": "identity_322312", "image": "PMC-VQA/images/PMC6410175_nanomaterials-09-00263-f003_448201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the green and purple dots represent in the images? \nChoices:\n(A) Single NCs \n(B) Zn and Mn \n(C) ARM \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.71875, "loss": 5.041131019592285 }, { "id": "identity_30967", "image": "A-OKVQA/images/147879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the ceiling?\nChoices:\n(A) balloons\n(B) lights\n(C) bats\n(D) squid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.78125, "loss": 4.073427677154541 }, { "id": "identity_36337", "image": "TQA/images/question_images/simple_machines_8197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When the man turns the wheel in a clockwise manner, what is the direction of the motion?\nChoices:\n(A) upward\n(B) counter-clockwise\n(C) downward\n(D) sideways" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.828125, "loss": 4.374796390533447 }, { "id": "identity_317035", "image": "PMC-VQA/images/PMC3386976_pone-0040352-g002_143406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the retinal imaging centred on this area show?\nChoices:\n(A) Anterior chamber of the eye\n(B) Macula\n(C) Cornea\n(D) Retinal blood vessels and nerve fibre layer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.21875, "loss": 4.333425521850586 }, { "id": "identity_275583", "image": "A-OKVQA/images/300499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what manner is the woman holding her bag while it is raining?\nChoices:\n(A) back\n(B) basket\n(C) shoulder\n(D) seat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.90625, "loss": 4.590901851654053 }, { "id": "identity_301688", "image": "FigureQA/images/93655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.21875, "loss": 4.073819160461426 }, { "id": "identity_103981", "image": "VQA2.0/images/304584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many prams/strollers can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.5625, "loss": 4.516167163848877 }, { "id": "identity_184376", "image": "A-OKVQA/images/535770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible containers with a blue lid are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 10.671875, "loss": 4.605850696563721 }, { "id": "identity_304096", "image": "FigureQA/images/56592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.1875834465026855 }, { "id": "identity_132485", "image": "VQA2.0/images/136185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the books' spines on the second shelf from the top?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.8125, "loss": 4.011883735656738 }, { "id": "identity_122536", "image": "VQA2.0/images/150769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Two flags" } ], "clip_score": 10.8828125, "loss": 4.4078049659729 }, { "id": "identity_29221", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f006_217799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anomalous feature is observed in the aortic root image?\nChoices:\n(A) A normal coronary artery. \n(B) A CAA traversing over the aortic valve annulus. \n(C) A CAA traversing over the pulmonary valve annulus. \n(D) An aortic root dilation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.578125, "loss": 4.376733779907227 }, { "id": "identity_107053", "image": "VQA2.0/images/115275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bath mat?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 16.71875, "loss": 4.4149394035339355 }, { "id": "identity_263081", "image": "Super-CLEVR/images/superCLEVR_new_001445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the metal object located to the left of the small purple shiny bicycle on the right side of the dirtbike?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 28.171875, "loss": 4.254969120025635 }, { "id": "identity_215620", "image": "VizWiz/images/VizWiz_train_00003851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buttons are visible on the electronic device?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.09375, "loss": 4.505172252655029 }, { "id": "identity_266062", "image": "IconQA/images/66890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape from the following options: cone, cube, or sphere.\nChoices:\n(A) cone\n(B) cube\n(C) sphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5, "loss": 4.094611167907715 }, { "id": "identity_223627", "image": "Super-CLEVR/images/superCLEVR_new_000302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 19.34375, "loss": 4.222084999084473 }, { "id": "identity_2007", "image": "VizWiz/images/VizWiz_train_00001946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what the buttons at the top and the bottom of the inner circle are labeled? Thank you. " }, { "from": "gpt", "value": "The answer is cant see inner circle" } ], "clip_score": 24.984375, "loss": 4.955240249633789 }, { "id": "identity_313560", "image": "IconQA/images/28872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.11422061920166 }, { "id": "identity_326348", "image": "ScienceQA/images/3341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Massachusetts\n(B) South Carolina\n(C) Connecticut\n(D) Maine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.876967906951904 }, { "id": "identity_289077", "image": "PlotQA/images/146235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars from the left is labeled as the 3rd group?" }, { "from": "gpt", "value": "The answer is Services" } ], "clip_score": 19.125, "loss": 4.99816370010376 }, { "id": "identity_333745", "image": "PMC-VQA/images/PMC5519497_Fig5_193168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT image obtained 1 month after the procedure show?\n\nChoices:\n(A) Liver ischemia\n(B) Hypertrophy of future liver remnant\n(C) Enlargement of portal vein branches\n(D) Embolization of portal vein branches" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.03125, "loss": 4.212615013122559 }, { "id": "identity_326797", "image": "PMC-VQA/images/PMC6085685_Fig2_352459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is present in these canals? \nChoices:\n(A) they are narrowed \n(B) they are absent \n(C) they are widened \n(D) they are twisted" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.5625, "loss": 5.472019195556641 }, { "id": "identity_280732", "image": "A-OKVQA/images/287415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the men positioned while using their phones? \nOptions:\n(A) walkway\n(B) street\n(C) curbside\n(D) park" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 4.523829460144043 }, { "id": "identity_248431", "image": "TextVQA/images/6d4b69a8ad721814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the \"Travellers\" logo appear on the ice, and what is the total number of advertisement signs visible in the image?" }, { "from": "gpt", "value": "The answer is eleven" } ], "clip_score": 22.9375, "loss": 5.120431423187256 }, { "id": "identity_259879", "image": "PMC-VQA/images/PMC6276199_Fig2_406699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas in the brain show new white matter growth, and what are the potential implications of this growth in terms of cognitive functions?" }, { "from": "gpt", "value": "The answer is Improved visual processing and recognition functions" } ], "clip_score": 26.96875, "loss": 4.423612594604492 }, { "id": "identity_283161", "image": "A-OKVQA/images/567877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the device the man is holding in his hand?\nChoices:\n(A) light candle\n(B) eat cake\n(C) cut cake\n(D) serve cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.4140625, "loss": 4.362519264221191 }, { "id": "identity_276131", "image": "PMC-VQA/images/PMC5570225_fig2_206568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the orange box in the inset image represent?\nChoices:\n(A) convoluted PM \n(B) anterior midgut region \n(C) posterior midgut region \n(D) complex membrane folding." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.626666069030762 }, { "id": "identity_301187", "image": "A-OKVQA/images/169576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What status is the individual the crowd is waiting for?\nChoices:\n(A) non existent\n(B) medium\n(C) high\n(D) low" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.984375, "loss": 4.192596435546875 }, { "id": "identity_61396", "image": "MapQA/images/map_6649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have the lowest percentage range of writers?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 32.78125, "loss": 4.084534168243408 }, { "id": "identity_318077", "image": "TextVQA/images/b9bedb287ce4bc89.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anniversary is being celebrated on this?" }, { "from": "gpt", "value": "The answer is travie" } ], "clip_score": 15.1796875, "loss": 4.306219100952148 }, { "id": "identity_333082", "image": "VizWiz/images/VizWiz_train_00018462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of mushroom is this? Is it safe to eat?" }, { "from": "gpt", "value": "The answer is wild" } ], "clip_score": 32.53125, "loss": 5.57560396194458 }, { "id": "identity_248667", "image": "ScienceQA/images/2612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these four objects have in common, and which of the following objects also share this property: orange, chocolate, or bread?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 22.734375, "loss": 5.121260643005371 }, { "id": "identity_332745", "image": "Super-CLEVR/images/superCLEVR_new_000003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else made of the same material as this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.609375, "loss": 4.518420696258545 }, { "id": "identity_281584", "image": "VQA2.0/images/549617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Whose horse's bridle is visible?" }, { "from": "gpt", "value": "The answer is foremost" } ], "clip_score": 25.8125, "loss": 4.141237258911133 }, { "id": "identity_123491", "image": "VQA2.0/images/199487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players can be seen on the fielding team?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 20.453125, "loss": 4.88134241104126 }, { "id": "identity_276537", "image": "AI2D/images/238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the herbivores in the given food chain are eliminated, which population will be significantly impacted?\nChoices:\n(A) Carnivore\n(B) Plant\n(C) Phytoplankton\n(D) Primary producers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.78125, "loss": 4.447718143463135 }, { "id": "identity_197968", "image": "VQA-AS/images/12810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slices of pizza are left on the plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.40625, "loss": 4.243943214416504 }, { "id": "identity_16403", "image": "IconQA/images/45915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) cone\n(B) sphere\n(C) cube\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 4.012746810913086 }, { "id": "identity_323755", "image": "A-OKVQA/images/368148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is actually the tallest thing in the picture?\nChoices:\n(A) boy\n(B) buildings\n(C) bus\n(D) clock tower" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.109375, "loss": 4.562076568603516 }, { "id": "identity_327937", "image": "PMC-VQA/images/PMC9005871_F2_252247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the findings in these images?\nChoices:\n(A) Arterial bleeding\n(B) Agenesis of corpus callosum, delayed white matter myelination and enlargement of lateral ventricles\n(C) Glioblastoma\n(D) Sinusitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 4.093390464782715 }, { "id": "identity_144440", "image": "DocVQA/images/hlmf0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of discussion under the first bullet point?" }, { "from": "gpt", "value": "The answer is Monitors" } ], "clip_score": 19.125, "loss": 4.493797302246094 }, { "id": "identity_132449", "image": "TextVQA/images/d27841756b83df5c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other object attached to the figure apart from the bottles?" }, { "from": "gpt", "value": "The answer is Yes, an Intel badge" } ], "clip_score": 24.3125, "loss": 4.580882549285889 }, { "id": "identity_328105", "image": "PMC-VQA/images/PMC5617943_diagnostics-07-00043-f001_221034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pattern of pulmonary uptake in this?\nChoices:\n(A) Focal \n(B) Diffuse \n(C) Peripheral \n(D) Central" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.03125, "loss": 4.40817403793335 }, { "id": "identity_268548", "image": "PMC-VQA/images/PMC7819965_f3_347108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was displayed in the zoomed panel A?\nChoices:\n(A) Inclusion of C. muridarum inside the OE \n(B) Inclusion of C. muridarum around the OE \n(C) No immunolabelling for C. muridarum \n(D) Inclusion of C. muridarum inside the control mouse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.203125, "loss": 4.3905930519104 }, { "id": "identity_31406", "image": "PMC-VQA/images/PMC2562460_F6_28479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of muscles are present in the anterior part? \nChoices:\n(A) transverse muscles \n(B) diagonal muscles \n(C) circular muscles \n(D) longitudinal muscles " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3046875, "loss": 4.374789237976074 }, { "id": "identity_52127", "image": "FigureQA/images/800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.71875, "loss": 4.262236595153809 }, { "id": "identity_337146", "image": "IconQA/images/102729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a house. Which image is fourth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.984375, "loss": 4.467486381530762 }, { "id": "identity_274405", "image": "TextVQA/images/8cfae1a2edd1779d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which jersey number does the red player wear?" }, { "from": "gpt", "value": "The answer is 05 " } ], "clip_score": 20.28125, "loss": 4.1697235107421875 }, { "id": "identity_118043", "image": "A-OKVQA/images/293895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.8125, "loss": 4.232900142669678 }, { "id": "identity_284744", "image": "AI2D/images/74.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the process in which plants absorb light energy?\nChoices:\n(A) photosynthesis\n(B) precipitation\n(C) dead organism\n(D) fossil fuels" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.46875, "loss": 4.0642781257629395 }, { "id": "identity_288098", "image": "PMC-VQA/images/PMC5339251_F1_134879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which source is the adapted image displayed in (A) taken? \nChoices:\n(A) Science Daily\n(B) The New York Times\n(C) Macmillan Publishers Ltd\n(D) National Geographic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.375, "loss": 4.0365309715271 }, { "id": "identity_272954", "image": "TextVQA/images/5f2ae2bcddb83a67.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which university is mentioned?" }, { "from": "gpt", "value": "The answer is michigan " } ], "clip_score": 18.640625, "loss": 4.9847798347473145 }, { "id": "identity_15091", "image": "Super-CLEVR/images/superCLEVR_new_001912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the shiny wagon in front of the large metal wagon have the same color as the metal wagon behind the tiny wagon?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.765625, "loss": 5.299747943878174 }, { "id": "identity_30810", "image": "PMC-VQA/images/PMC8515925_fig4_84101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows indicate in the image? \nChoices:\n(A) Macrophages \n(B) Granulomas \n(C) Lymphocytes \n(D) Blood vessels . " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.546875, "loss": 4.283524513244629 }, { "id": "identity_279399", "image": "DocVQA/images/snpf0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the fax note addressed?" }, { "from": "gpt", "value": "The answer is Jeff Rumberger" } ], "clip_score": 24.296875, "loss": 4.778522491455078 }, { "id": "identity_312533", "image": "FigureQA/images/5699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.109375, "loss": 4.546631336212158 }, { "id": "identity_305111", "image": "ScienceQA/images/6165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms is the omnivore in this food web?\nChoices:\n(A) black rockfish\n(B) zooplankton\n(C) kelp bass\n(D) orca" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.84375, "loss": 4.123570442199707 }, { "id": "identity_182396", "image": "A-OKVQA/images/108111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups can be seen on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.1953125, "loss": 4.519721508026123 }, { "id": "identity_300569", "image": "A-OKVQA/images/171479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event are they participating in?\nChoices:\n(A) training\n(B) commuting\n(C) competition\n(D) leisure activity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.203125, "loss": 4.008973121643066 }, { "id": "identity_334124", "image": "TextVQA/images/21a9959e9dcb4f97.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is the person in red?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.109375, "loss": 4.313124179840088 }, { "id": "identity_26045", "image": "ScienceQA/images/12301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of this push?\nChoices:\n(A) away from the bottom magnet\n(B) toward the bottom magnet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.46875, "loss": 4.301175117492676 }, { "id": "identity_124927", "image": "VQA2.0/images/429446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas can be seen in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.546875, "loss": 4.387816429138184 }, { "id": "identity_313236", "image": "A-OKVQA/images/88470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this cage filled with birds of the same type?\nChoices:\n(A) to train\n(B) to eat\n(C) to sell\n(D) to cook" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.796875, "loss": 4.226001262664795 }, { "id": "identity_287650", "image": "A-OKVQA/images/110108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the giant fish in the air: a blimp, a balloon, a kite, or a sculpture?\nChoices:\n(A) blimp\n(B) balloon\n(C) kite\n(D) sculpture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.921875, "loss": 4.052863121032715 }, { "id": "identity_314452", "image": "PMC-VQA/images/PMC4161720_Fig1_319755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging was used to diagnose the condition?\nChoices:\n(A) X-Ray\n(B) MRI\n(C) PET-Scan\n(D) CT-Scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.9375, "loss": 4.690980911254883 }, { "id": "identity_12529", "image": "FigureQA/images/53769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.765625, "loss": 4.0949273109436035 }, { "id": "identity_25243", "image": "A-OKVQA/images/83766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this scene taking place?\nChoices:\n(A) airport\n(B) mall\n(C) dmv\n(D) school" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.921875, "loss": 4.67043399810791 }, { "id": "identity_295004", "image": "PMC-VQA/images/PMC4167003_F5_320731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal was utilized in the experiment? \nOptions:\n(A) Rabbit\n(B) Rat\n(C) Cat\n(D) Mouse" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.203125, "loss": 5.262614727020264 }, { "id": "identity_117010", "image": "VQA2.0/images/228434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is in the container on the desk?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 23.328125, "loss": 4.099705219268799 }, { "id": "identity_299482", "image": "DocVQA/images/fnny0225_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you explain the concept of Five-Year Internal Rate of Return (IRR)?" }, { "from": "gpt", "value": "The answer is 84 percent" } ], "clip_score": 25.234375, "loss": 4.011443138122559 }, { "id": "identity_298193", "image": "A-OKVQA/images/429793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the laptop positioned on the desk beside the potted flowers?\nOptions:\n(A) gray\n(B) blue\n(C) black\n(D) red" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.25, "loss": 4.660707473754883 }, { "id": "identity_308158", "image": "AI2D/images/1761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all grass dies in the food web diagram, what will happen to the population of the mouse?\nChoices:\n(A) decrease\n(B) increase\n(C) remains the same\n(D) NA" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.0625, "loss": 4.00344181060791 }, { "id": "identity_20462", "image": "PMC-VQA/images/PMC5963881_f8_317288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the SEM image? \nChoices:\n(A) Pyritic component \n(B) Reconstructed vesicle \n(C) Analysis grid \n(D) Wall region of a vesicle " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.25, "loss": 4.587685585021973 }, { "id": "identity_294685", "image": "Super-CLEVR/images/superCLEVR_new_001992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the remaining part of the big green jet and the object behind the small gray tandem bike made of the same material?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 32.0625, "loss": 5.1706037521362305 }, { "id": "identity_126943", "image": "TextVQA/images/19703a54731b9656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the microwave?" }, { "from": "gpt", "value": "The answer is Carousel" } ], "clip_score": 21.671875, "loss": 4.321904182434082 }, { "id": "identity_613", "image": "PMC-VQA/images/PMC8601058_fig-3_115220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the glandular trichomes? \nChoices:\n(A) Orange \n(B) Green \n(C) Yellow \n(D) Blue " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.484375, "loss": 4.961550235748291 }, { "id": "identity_31637", "image": "A-OKVQA/images/341838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What group of people mostly live in this area?\nChoices:\n(A) korean\n(B) chinese\n(C) japanese\n(D) indian" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.609375, "loss": 4.281322479248047 }, { "id": "identity_596", "image": "A-OKVQA/images/355786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is flying through the air?\nChoices:\n(A) stuffed animal\n(B) livestock\n(C) bear\n(D) chicken" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.59375, "loss": 4.000203609466553 }, { "id": "identity_115890", "image": "VizWiz/images/VizWiz_train_00023704.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cables can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.625, "loss": 4.324732780456543 }, { "id": "identity_265885", "image": "ScienceQA/images/1140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\n(A) Providence\n(B) Albany\n(C) Newport\n(D) Augusta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.40625, "loss": 4.120110034942627 }, { "id": "identity_325626", "image": "A-OKVQA/images/375806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this building used for?\nChoices:\n(A) library\n(B) church\n(C) lifeguard\n(D) hospital" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4296875, "loss": 4.495865821838379 }, { "id": "identity_107539", "image": "A-OKVQA/images/309322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many framed images are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.84375, "loss": 4.389716148376465 }, { "id": "identity_278562", "image": "VizWiz/images/VizWiz_train_00018502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please confirm if there is any dosage information displayed on this bottle? Thank you." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.234375, "loss": 4.394562244415283 }, { "id": "identity_275648", "image": "ScienceQA/images/6520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from each battery to the surroundings or from the surroundings to each battery?\nChoices:\n(A) each battery . . . the surroundings\n(B) the surroundings . . . each battery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.21875, "loss": 4.673494338989258 }, { "id": "identity_298629", "image": "PMC-VQA/images/PMC6889731_Fig1_77628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the white arrow shown in the image?\nChoices:\n(A) Eye inflammation\n(B) Cataracts\n(C) Contralateral contrast\n(D) Blocked tear duct." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.40625, "loss": 4.089427947998047 }, { "id": "identity_264132", "image": "IconQA/images/89933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.09375, "loss": 4.098620414733887 }, { "id": "identity_262204", "image": "Super-CLEVR/images/superCLEVR_new_001214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the small rubber airplanes to the left of the gray matte airplane outnumber the big brown matte buses?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.078125, "loss": 4.971519470214844 }, { "id": "identity_281436", "image": "VQA2.0/images/514457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the bear's face?" }, { "from": "gpt", "value": "The answer is furr" } ], "clip_score": 15.0703125, "loss": 4.861036777496338 }, { "id": "identity_279757", "image": "TQA/images/abc_question_images/human_system_digestive_13713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the liver located?\nChoices:\n(A) M\n(B) C\n(C) J\n(D) Y" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 4.4228291511535645 }, { "id": "identity_307047", "image": "PMC-VQA/images/PMC7377830_f10_218656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the histogram plot used to analyze?\nChoices:\n(A) muscular structure analysis \n(B) trabecular structure analysis \n(C) bone mass analysis \n(D) bone volume analysis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5546875, "loss": 4.301033020019531 }, { "id": "identity_289701", "image": "PMC-VQA/images/PMC6000706_Figure5_328815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the histological pattern of the tumor cells among the following choices: Glandular, Tubular, Squamous, or Columnar?\nChoices:\n(A) Glandular \n(B) Tubular \n(C) Squamous \n(D) Columnar" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 4.0173821449279785 }, { "id": "identity_22814", "image": "IconQA/images/47623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.0625, "loss": 4.578895568847656 }, { "id": "identity_15279", "image": "FigureQA/images/13134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Gold the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.203125, "loss": 4.341136455535889 }, { "id": "identity_310684", "image": "PMC-VQA/images/PMC2941604_F0002_74011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is affected by the mass shown in the image?\nChoices:\n(A) Spinal cord \n(B) Cervical lymph nodes \n(C) Sacrum \n(D) Shoulder joint" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.390625, "loss": 4.860098838806152 }, { "id": "identity_326801", "image": "PMC-VQA/images/PMC6262857_BIO038661F3_402705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in green?\nChoices:\n(A) Gsh2 \n(B) Pax6 \n(C) DAPI \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.375, "loss": 4.997471809387207 }, { "id": "identity_242273", "image": "DVQA/images/bar_train_00094073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest average accuracy across all the datasets?" }, { "from": "gpt", "value": "The answer is drug" } ], "clip_score": 29.921875, "loss": 4.322895526885986 }, { "id": "identity_31009", "image": "IconQA/images/73369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is third?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.53125, "loss": 4.174116611480713 }, { "id": "identity_298980", "image": "A-OKVQA/images/292543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man engaged in with the beans among the following choices: selling, cooking, eating, or counting them?\nChoices:\n(A) selling them\n(B) cooking\n(C) eating them\n(D) counting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.28125, "loss": 4.612583637237549 }, { "id": "identity_4279", "image": "AI2D/images/284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what organisms are carnivores?\nChoices:\n(A) Grizzly bear and marten\n(B) Deer mouse and red squirrel\n(C) Marten\n(D) Grasses and elderberry" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.1875, "loss": 4.486231803894043 }, { "id": "identity_9040", "image": "IconQA/images/99112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squares even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.234375, "loss": 4.168015003204346 }, { "id": "identity_276119", "image": "FigureQA/images/15731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Khaki or Saddle Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0, "loss": 4.421950340270996 }, { "id": "identity_25437", "image": "IconQA/images/58378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past two." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 26.359375, "loss": 4.79355001449585 }, { "id": "identity_305702", "image": "ScienceQA/images/1344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these four items?\nChoices:\n(A) sweet\n(B) fragile\n(C) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.09375, "loss": 4.501976490020752 }, { "id": "identity_10569", "image": "Super-CLEVR/images/superCLEVR_new_000604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the large thing that is in front of the green dirtbike and on the left side of the gray aeroplane?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 29.5625, "loss": 4.515239715576172 }, { "id": "identity_8000", "image": "FigureQA/images/41709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.71875, "loss": 4.284090995788574 }, { "id": "identity_37441", "image": "A-OKVQA/images/462953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What business is this display promoting?\nChoices:\n(A) nature trips\n(B) restaurant\n(C) travel agency\n(D) sporting goods" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.171875, "loss": 4.087531089782715 }, { "id": "identity_24384", "image": "PMC-VQA/images/PMC5899831_Fig4_301522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the radiographs show?\nChoices:\n(A) Fracture in one cortex \n(B) Fracture in both cortices \n(C) No fracture \n(D) Fracture in the epiphysis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.765625, "loss": 4.099658966064453 }, { "id": "identity_274602", "image": "VizWiz/images/VizWiz_train_00016669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title and disc number of this CD?" }, { "from": "gpt", "value": "The answer is scarecrow army thieves disc 1 10" } ], "clip_score": 26.109375, "loss": 4.327646255493164 }, { "id": "identity_329485", "image": "Super-CLEVR/images/superCLEVR_new_000323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the large yellow item behind the big yellow matte bicycle have the same shape as the small green item?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.421875, "loss": 5.243134021759033 }, { "id": "identity_185509", "image": "Super-CLEVR/images/superCLEVR_new_000546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.484375, "loss": 4.333278179168701 }, { "id": "identity_287291", "image": "A-OKVQA/images/314572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the animal attached to the cart currently engaged in?\nChoices:\n(A) racing\n(B) eating\n(C) waiting\n(D) drinking" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.875, "loss": 4.3166422843933105 }, { "id": "identity_64137", "image": "ScienceQA/images/19729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color used to highlight the country on the map?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 25.90625, "loss": 4.4219441413879395 }, { "id": "identity_689", "image": "FigureQA/images/68342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Purple have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.59375, "loss": 4.098591327667236 }, { "id": "identity_335637", "image": "DocVQA/images/klbm0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the chairman of this committee?" }, { "from": "gpt", "value": "The answer is The Warden" } ], "clip_score": 18.4375, "loss": 4.149685382843018 }, { "id": "identity_17595", "image": "PMC-VQA/images/PMC6131333_F1_365618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is absent in the CT scan images of skull base? \nChoices:\n(A) Right internal carotid artery (ICA) \n(B) Left internal carotid artery (ICA) \n(C) External carotid artery (ECA) \n(D) Vertebral artery (VA) " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.40625, "loss": 4.3615007400512695 }, { "id": "identity_20769", "image": "PMC-VQA/images/PMC4121303_F1_311281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated with black arrows in (C)? \nChoices:\n(A) Muscles. \n(B) Lungs. \n(C) Liver. \n(D) Heart. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.640625, "loss": 4.013131618499756 }, { "id": "identity_27359", "image": "IconQA/images/15987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bucket. Which picture is fifth?\nChoices:\n(A) bucket\n(B) boat\n(C) crab" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.734375, "loss": 4.554766654968262 }, { "id": "identity_37082", "image": "A-OKVQA/images/205486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item do multiple elderly persons here grasp?\nChoices:\n(A) scepters\n(B) canes\n(C) wheelchairs\n(D) tiaras" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.1640625, "loss": 4.447312355041504 }, { "id": "identity_320882", "image": "ScienceQA/images/1561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Florida\n(B) Indiana\n(C) Connecticut\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.46875, "loss": 4.7061686515808105 }, { "id": "identity_277003", "image": "PMC-VQA/images/PMC8936940_f2_227285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction in the image represents the standard fluorescence visualization?\nChoices:\n(A) Left\n(B) Right\n(C) Top\n(D) Bottom" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.578125, "loss": 4.182104587554932 }, { "id": "identity_21041", "image": "A-OKVQA/images/541596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people near the bus doing?\nChoices:\n(A) sitting\n(B) standing\n(C) running\n(D) eating" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.390625, "loss": 4.35858154296875 }, { "id": "identity_290192", "image": "TQA/images/question_images/parts_microscope_7198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the microscope would you observe an object?\nChoices:\n(A) Nose piece\n(B) Slide\n(C) Ocular lenses\n(D) Stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.015625, "loss": 4.457729339599609 }, { "id": "identity_270019", "image": "A-OKVQA/images/61175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which person in the group is wearing a green shirt?\nChoices:\n(A) audience\n(B) referee\n(C) tennis player\n(D) staff" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.171875, "loss": 4.102705001831055 }, { "id": "identity_10166", "image": "Super-CLEVR/images/superCLEVR_new_000963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny green mountain bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.390625, "loss": 5.429685115814209 }, { "id": "identity_149074", "image": "PlotQA/images/72407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is Year" } ], "clip_score": 18.078125, "loss": 4.382453441619873 }, { "id": "identity_209019", "image": "A-OKVQA/images/558618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the child's pants?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 25.515625, "loss": 4.106664657592773 }, { "id": "identity_3352", "image": "AI2D/images/1884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the correct direction of energy transfer using the diagram below:\nChoices:\n(A) From frog to rabbit\n(B) From hawk to snake\n(C) From grass to rabbit\n(D) From grasshopper to mouse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.78125, "loss": 4.168923854827881 }, { "id": "identity_292794", "image": "PMC-VQA/images/PMC4439484_fig2_388453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the status of the lymph nodes surrounding the tumor? Is it:\n(A) Normal\n(B) Swollen\n(C) Not visible\n(D) Faintly visible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.203125, "loss": 4.582091331481934 }, { "id": "identity_36799", "image": "PMC-VQA/images/PMC5618513_ijms-18-01864-f001_221292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the high uptake observed in this PET/CT scan? \nChoices:\n(A) Right supraclavicular lymph nodes \n(B) Abdominal aorta \n(C) Left supraclavicular lymph nodes \n(D) Liver " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.21875, "loss": 4.581860542297363 }, { "id": "identity_293670", "image": "PMC-VQA/images/PMC6627094_ijms-20-03047-f007_7646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the subject of the study's imaging? \nChoices:\n(A) HER2 expression\n(B) pancreatic cancer cells\n(C) bone marrow\n(D) lungs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.84375, "loss": 4.227138519287109 }, { "id": "identity_35785", "image": "ScienceQA/images/3677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is this window made of?\nChoices:\n(A) cotton\n(B) wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.172942161560059 }, { "id": "identity_221514", "image": "VQA2.0/images/318261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing lines are visible on the road?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.125, "loss": 4.052685260772705 }, { "id": "identity_9221", "image": "PMC-VQA/images/PMC6102916_Fig1_357108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main finding in the image? \nChoices:\n(A) Necrosis; \n(B) Tubular injury; \n(C) Glomerulitis; \n(D) Interstitial fibrosis. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.609375, "loss": 5.0248799324035645 }, { "id": "identity_286359", "image": "ScienceQA/images/9698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) sticky\n(B) rough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 4.25159215927124 }, { "id": "identity_35362", "image": "A-OKVQA/images/349059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will they squeeze the substance in the tube onto?\nChoices:\n(A) onto washcloth\n(B) toothbrush\n(C) into sink\n(D) onto soap" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.71875, "loss": 4.170855522155762 }, { "id": "identity_77864", "image": "AI2D/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 33.0, "loss": 4.368947505950928 }, { "id": "identity_27559", "image": "A-OKVQA/images/325532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are the objects in the sky powered?\nChoices:\n(A) gas\n(B) wind\n(C) sun\n(D) electricity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1171875, "loss": 4.281625270843506 }, { "id": "identity_299724", "image": "A-OKVQA/images/483817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these surfers hoping for in terms of a natural event?\nChoices:\n(A) tranquility\n(B) doldrums\n(C) eclipse\n(D) large tides" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.796875, "loss": 4.342606067657471 }, { "id": "identity_22092", "image": "PMC-VQA/images/PMC6990568_Fig3_108117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the variable α represent? \nChoices:\n(A) the angle between the middle line and the left facet line connecting the margins of the inferior articular process. \n(B) the angle between the middle line and the left facet line connecting the margins of the superior articular process. \n(C) the angle between the middle line and the right facet line connecting the margins of the superior articular process. \n(D) the angle between the middle line and the right facet line connecting the margins of the inferior articular process. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.40625, "loss": 4.309737205505371 }, { "id": "identity_6576", "image": "PMC-VQA/images/PMC5746883_f3_258365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What radiological finding corresponds with the hypoechoic intraductal nodule?\nChoices:\n(A) CT scan \n(B) Mammography \n(C) Ultrasound \n(D) X-ray " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.40625, "loss": 4.591925621032715 }, { "id": "identity_289317", "image": "TextVQA/images/0116dcf25359ea13.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is displayed on the chest of the green candy?" }, { "from": "gpt", "value": "The answer is letter m" } ], "clip_score": 20.671875, "loss": 4.376122951507568 }, { "id": "identity_6323", "image": "DVQA/images/bar_train_00020518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is farmer" } ], "clip_score": 17.0625, "loss": 4.4234843254089355 }, { "id": "identity_323267", "image": "A-OKVQA/images/431854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is facing downwards?\nChoices:\n(A) tennis racquet\n(B) sword\n(C) axe\n(D) laser beam" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.859375, "loss": 4.050969123840332 }, { "id": "identity_269921", "image": "ScienceQA/images/832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Connecticut\n(B) New Jersey\n(C) Kentucky\n(D) Virginia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.15625, "loss": 4.754258155822754 }, { "id": "identity_111689", "image": "Super-CLEVR/images/superCLEVR_new_001533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located to the immediate right of the golden car?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.328125, "loss": 4.747661113739014 }, { "id": "identity_291523", "image": "A-OKVQA/images/329139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the bananas meant?\nChoices:\n(A) planting\n(B) selling\n(C) eating\n(D) disposal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.7578125, "loss": 4.385401248931885 }, { "id": "identity_319706", "image": "PMC-VQA/images/PMC9247462_F5_328791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI signal did these lesions show?\nChoices:\n(A) T1-hyperintense\n(B) T2-hyperintense\n(C) T1-hypointense\n(D) T2-hypointense" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 4.088490009307861 }, { "id": "identity_299005", "image": "VQA2.0/images/476852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What drink is in the mug?" }, { "from": "gpt", "value": "The answer is nothing" } ], "clip_score": 21.875, "loss": 4.080317974090576 }, { "id": "identity_267502", "image": "FigureQA/images/69083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.0625, "loss": 4.019532203674316 }, { "id": "identity_312778", "image": "Super-CLEVR/images/superCLEVR_new_001884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the red matte object the same as the small blue metal object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 12.625, "loss": 5.180581092834473 }, { "id": "identity_268808", "image": "Super-CLEVR/images/superCLEVR_new_001463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a greater number of bicycles behind the large gray double bus compared to the small brown metallic trucks?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.46875, "loss": 5.07099723815918 }, { "id": "identity_10613", "image": "TQA/images/question_images/earth_day_night_2589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the equator in relation to the poles?\nChoices:\n(A) In the middle\n(B) South\n(C) North\n(D) East" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.203125, "loss": 4.110444068908691 }, { "id": "identity_17685", "image": "TQA/images/abc_question_images/layers_of_atmosphere_18101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter is vertically the highest?\nChoices:\n(A) P\n(B) M\n(C) J\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 4.259765148162842 }, { "id": "identity_22438", "image": "TextVQA/images/c07655095dbd052c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the sign behind th ecounter person?" }, { "from": "gpt", "value": "The answer is septonic" } ], "clip_score": 12.953125, "loss": 5.542456150054932 }, { "id": "identity_31037", "image": "A-OKVQA/images/21213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What purpose does the weight in the back serve?\nChoices:\n(A) paper weight\n(B) weigh money\n(C) balance table\n(D) measure price" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.8046875, "loss": 4.410930633544922 }, { "id": "identity_337254", "image": "TextVQA/images/00fbe21f9e1b79de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this vehicle headed?" }, { "from": "gpt", "value": "The answer is rodeo" } ], "clip_score": 24.75, "loss": 4.121367931365967 }, { "id": "identity_316407", "image": "TQA/images/abc_question_images/parts_microscope_17198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the lenses that adjust something?\nChoices:\n(A) J\n(B) M\n(C) A\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.265625, "loss": 4.094318389892578 }, { "id": "identity_121119", "image": "A-OKVQA/images/231655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.5, "loss": 4.423978328704834 }, { "id": "identity_181978", "image": "VQA2.0/images/73951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headwear is the person in the foreground wearing?" }, { "from": "gpt", "value": "The answer is A cap" } ], "clip_score": 21.578125, "loss": 4.065024375915527 }, { "id": "identity_6110", "image": "A-OKVQA/images/445019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near the ball?\nChoices:\n(A) toddler\n(B) baby\n(C) batter\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.0546875, "loss": 4.233142852783203 }, { "id": "identity_37088", "image": "PMC-VQA/images/PMC4058497_fig1_298382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lesion location observed on the MRI?\nChoices:\n(A) Subarachnoid space \n(B) Cerebral cortex \n(C) Meckel's cavity \n(D) Brainstem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.890625, "loss": 4.353549957275391 }, { "id": "identity_281642", "image": "AI2D/images/2197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which locations on the diagrams can the eggs be found?\nChoices:\n(A) T and C\n(B) J and L\n(C) E and B\n(D) G and P" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.46875, "loss": 4.0515594482421875 }, { "id": "identity_276091", "image": "TQA/images/abc_question_images/skin_cross_section_17580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the letter \"P\" in the given options?\nChoices:\n(A) ECCRINE SWEAT GLAND\n(B) PORES\n(C) SKIN SURFACE\n(D) DERMIS" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0, "loss": 4.491576671600342 }, { "id": "identity_310422", "image": "A-OKVQA/images/402715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these creatures have?\nChoices:\n(A) wings\n(B) long necks\n(C) wool\n(D) quills" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.15625, "loss": 4.441897392272949 }, { "id": "identity_33449", "image": "PMC-VQA/images/PMC6618072_hed25675-fig-0006_6431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the green arrow in the image? \nChoices:\n(A) The reconstructed trachea. \n(B) The FPTAPF. \n(C) The esophagus. \n(D) The heart. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.78125, "loss": 4.7593793869018555 }, { "id": "identity_34427", "image": "PMC-VQA/images/PMC6232946_rcr2385-fig-0001_395611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to capture this image? \nChoices:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.530594348907471 }, { "id": "identity_310796", "image": "AI2D/images/1449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents a Northern Summer/Southern Winter?\nChoices:\n(A) G\n(B) E\n(C) N\n(D) B" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.109375, "loss": 4.646541118621826 }, { "id": "identity_104706", "image": "A-OKVQA/images/420634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the orange truck?" }, { "from": "gpt", "value": "The answer is A green van" } ], "clip_score": 17.921875, "loss": 4.887661933898926 }, { "id": "identity_237611", "image": "TextVQA/images/8d6ae29de134eeef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model name of the typewriter and identify two other items visible in the image?" }, { "from": "gpt", "value": "The answer is Baby, ink bottle, pen" } ], "clip_score": 35.15625, "loss": 4.495007038116455 }, { "id": "identity_123050", "image": "VQA-AS/images/1344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.03125, "loss": 4.422170162200928 }, { "id": "identity_306804", "image": "AI2D/images/3140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the upper epidermis of the leaf?\nChoices:\n(A) N\n(B) P\n(C) B\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.295724868774414 }, { "id": "identity_265974", "image": "VizWiz/images/VizWiz_train_00015553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the color of this shirt?" }, { "from": "gpt", "value": "The answer is grey" } ], "clip_score": 25.5, "loss": 4.12770414352417 }, { "id": "identity_219629", "image": "Super-CLEVR/images/superCLEVR_new_000398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that have a striped pattern?" }, { "from": "gpt", "value": "The answer is Yes, one vehicle" } ], "clip_score": 27.609375, "loss": 4.203000545501709 }, { "id": "identity_303667", "image": "PMC-VQA/images/PMC6686529_Fig2_21311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which section of the plantar fascia was examined in the study?\nChoices:\n(A) Sagittal section\n(B) Coronal section\n(C) Transverse section\n(D) Oblique section" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.40625, "loss": 4.128088474273682 }, { "id": "identity_293219", "image": "TextVQA/images/24bec4d1080a9171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is the first one on the boy's shirt?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 15.2109375, "loss": 4.260517597198486 }, { "id": "identity_281542", "image": "A-OKVQA/images/411109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the man being on the ground?\nChoices:\n(A) unhappy\n(B) fell down\n(C) resting\n(D) touching base" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.703125, "loss": 4.580508232116699 }, { "id": "identity_300204", "image": "A-OKVQA/images/59361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animals are visible?\nChoices:\n(A) cows\n(B) antelopes\n(C) giraffes\n(D) horses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.34375, "loss": 4.048707008361816 }, { "id": "identity_338444", "image": "A-OKVQA/images/342408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the animal mostly dry?\nChoices:\n(A) indoors\n(B) umbrella\n(C) hot\n(D) tent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5859375, "loss": 4.492615699768066 }, { "id": "identity_304349", "image": "DVQA/images/bar_train_00011446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the slateblue color?" }, { "from": "gpt", "value": "The answer is task" } ], "clip_score": 26.390625, "loss": 5.2750935554504395 }, { "id": "identity_316603", "image": "VQA2.0/images/491580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this place?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 15.0625, "loss": 4.132782936096191 }, { "id": "identity_333110", "image": "TextVQA/images/e4c065f9d9fc116e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the beer on the end?" }, { "from": "gpt", "value": "The answer is stallhager" } ], "clip_score": 22.75, "loss": 5.146186351776123 }, { "id": "identity_103499", "image": "A-OKVQA/images/252929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shape used in the decorations?" }, { "from": "gpt", "value": "The answer is Star" } ], "clip_score": 14.90625, "loss": 4.583287239074707 }, { "id": "identity_269787", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f006_203200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side is the anterior widening of the sacroiliac joint visible?\nChoices:\n(A) right side\n(B) left side\n(C) both sides\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.859375, "loss": 4.344242095947266 }, { "id": "identity_261533", "image": "FigureQA/images/33744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the highest level?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.84375, "loss": 4.313038349151611 }, { "id": "identity_313580", "image": "VQA2.0/images/150769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the weather?" }, { "from": "gpt", "value": "The answer is motorcycle" } ], "clip_score": 21.859375, "loss": 4.972638130187988 }, { "id": "identity_267212", "image": "Super-CLEVR/images/superCLEVR_new_001946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more small cyan airliners to the left of the tiny matte bus than large matte cars to the right of the yellow truck?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 37.1875, "loss": 4.85843563079834 }, { "id": "identity_115963", "image": "TextVQA/images/00ee02cc29bf07e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sign with a tree and flame symbol indicate?" }, { "from": "gpt", "value": "The answer is Flammable solid" } ], "clip_score": 15.7109375, "loss": 4.360270977020264 }, { "id": "identity_146601", "image": "DocVQA/images/skbf0001_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who will meet the group with room keys at the Waldorf-Astoria hotel?" }, { "from": "gpt", "value": "The answer is John Powell" } ], "clip_score": 22.328125, "loss": 4.021999835968018 }, { "id": "identity_204920", "image": "TextVQA/images/caeef917566e0387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light towers are illuminated in the stadium?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.34375, "loss": 5.039685249328613 }, { "id": "identity_186229", "image": "VQA-AS/images/2522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of door is shown in the image?" }, { "from": "gpt", "value": "The answer is A double panel glass door" } ], "clip_score": 27.875, "loss": 4.44503116607666 }, { "id": "identity_321953", "image": "A-OKVQA/images/2498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the puddle of water in the foreground of the players?\nChoices:\n(A) snow\n(B) sleet\n(C) rain\n(D) low tide" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.734375, "loss": 4.418410301208496 }, { "id": "identity_288289", "image": "PMC-VQA/images/PMC6242942_Fig2_397843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the image displayed inside the gallery?\nChoices:\n(A) Coffee seed components \n(B) Different developmental stages of the insect \n(C) Posterior of the insect \n(D) Transparent berry components." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.078125, "loss": 4.558348655700684 }, { "id": "identity_144261", "image": "DocVQA/images/zldg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the Chairman of Group 2?" }, { "from": "gpt", "value": "The answer is Stare" } ], "clip_score": 15.1796875, "loss": 5.551753997802734 }, { "id": "identity_303741", "image": "TQA/images/abc_question_images/human_system_digestive_13720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the organ that stores and concentrates bile before releasing it into the small intestine?\nChoices:\n(A) F\n(B) V\n(C) U\n(D) K" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.21875, "loss": 4.038226127624512 }, { "id": "identity_306937", "image": "PMC-VQA/images/PMC3852808_fig3_249325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the condition shown here?\nChoices:\n(A) Pre-stenotic Dilation \n(B) Pre-hypertensive Constriction \n(C) Post-stenotic Dilation \n(D) Post-hypertensive Constriction" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5625, "loss": 4.593024253845215 }, { "id": "identity_938", "image": "Super-CLEVR/images/superCLEVR_new_001214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small rubber airliners to the left of the gray matte aeroplane than big brown matte buss?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.328125, "loss": 5.141994953155518 }, { "id": "identity_317521", "image": "PMC-VQA/images/PMC3016500_F0003_83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the post embolization angiogram?\nChoices:\n(A) Active contrast extravasation\n(B) Steel coils\n(C) Filling defects in the veins\n(D) Lack of blood flow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.808831214904785 }, { "id": "identity_244294", "image": "AI2D/images/4431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the fin labeled C and describe the differences between it and the fin labeled B" }, { "from": "gpt", "value": "The answer is Single fin is continuous; trigger fin is pointed and defensive" } ], "clip_score": 28.78125, "loss": 4.6189775466918945 }, { "id": "identity_336891", "image": "A-OKVQA/images/247146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skill level of the teams playing this game?\nChoices:\n(A) both novices\n(B) even\n(C) lopsided\n(D) senior masters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.515625, "loss": 4.734805107116699 }, { "id": "identity_223059", "image": "A-OKVQA/images/109231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background to the left of the red aircraft?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.5625, "loss": 4.731363296508789 }, { "id": "identity_25180", "image": "A-OKVQA/images/495051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why has the man covered his head?\nChoices:\n(A) style\n(B) keep cool\n(C) protection\n(D) uniform" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.640625, "loss": 4.401346206665039 }, { "id": "identity_25212", "image": "PMC-VQA/images/PMC2679713_F1_38145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the imaging findings of Multiple Sclerosis on FLAIR images? \nChoices:\n(A) Decreased signal intensity in the periventricular and subcortical white matter \n(B) Bright signal in the periventricular and subcortical white matter \n(C) Presence of fluid-filled spaces in the brain \n(D) Thickening of the meninges " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 4.425124645233154 }, { "id": "identity_28646", "image": "A-OKVQA/images/340998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occupation does the person with the loaded bike beside them?\nChoices:\n(A) patisserie\n(B) fortune teller\n(C) florist\n(D) food seller" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.234375, "loss": 4.324408531188965 }, { "id": "identity_338225", "image": "A-OKVQA/images/11805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is taking place?\nChoices:\n(A) wedding\n(B) birthday party\n(C) farmers market\n(D) reception" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.171875, "loss": 4.199942588806152 }, { "id": "identity_34706", "image": "PMC-VQA/images/PMC5264455_Fig2_118412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow pointing towards in the image? \nChoices:\n(A) The apex of the chest \n(B) The thoracoscopic suction catheter \n(C) The visible defect in the conduit \n(D) The esophageal stricture " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.28125, "loss": 4.320279598236084 }, { "id": "identity_290092", "image": "A-OKVQA/images/293233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind cricketers wearing white clothing during matches?\nChoices:\n(A) reduces heat\n(B) peace\n(C) unique\n(D) dress code" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.1484375, "loss": 4.266840934753418 }, { "id": "identity_278616", "image": "FigureQA/images/51413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Seafoam or Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.6875, "loss": 4.693507671356201 }, { "id": "identity_324417", "image": "PMC-VQA/images/PMC7684875_Fig2_303874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is found in both lower lobes?\nChoices:\n(A) pneumonia \n(B) pleural effusion \n(C) lung fibrosis \n(D) tuberculosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.218049049377441 }, { "id": "identity_277798", "image": "PMC-VQA/images/PMC3407775_F1_147337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is impacted by the soft tissue mass among the following options: left knee, right shoulder, neck, or left elbow?\nChoices:\n(A) left knee \n(B) right shoulder \n(C) neck \n(D) left elbow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.265625, "loss": 4.006467342376709 }, { "id": "identity_15628", "image": "VQA-AS/images/553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bike?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 17.78125, "loss": 4.210288047790527 }, { "id": "identity_23416", "image": "ScienceQA/images/7819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) Trenton\n(C) Harrisburg\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.03125, "loss": 4.16910457611084 }, { "id": "identity_21539", "image": "A-OKVQA/images/519784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the floor next to the van?\nChoices:\n(A) arrow\n(B) footprints\n(C) coyote\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.0859375, "loss": 4.1998772621154785 }, { "id": "identity_1509", "image": "A-OKVQA/images/395198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman riding?\nChoices:\n(A) bike\n(B) skateboard\n(C) scooter\n(D) motorcycle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.515625, "loss": 4.424586772918701 }, { "id": "identity_14802", "image": "AI2D/images/4311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label is the sun?\nChoices:\n(A) Label I\n(B) Label C\n(C) Label J\n(D) Label B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.5, "loss": 4.842043876647949 }, { "id": "identity_293146", "image": "PMC-VQA/images/PMC5550971_RSIF20170203F1_201629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the color coding in the image?\nChoices:\n(A) Age of the patient \n(B) Extent of LV damage \n(C) Type of heart disease \n(D) Race of the patient" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.596118927001953 }, { "id": "identity_11140", "image": "FigureQA/images/14606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.604137420654297 }, { "id": "identity_118541", "image": "VizWiz/images/VizWiz_train_00017568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is shown in the image?" }, { "from": "gpt", "value": "The answer is Conference phone" } ], "clip_score": 25.59375, "loss": 5.419666767120361 }, { "id": "identity_23623", "image": "A-OKVQA/images/2687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the men's coats yellow in color?\nChoices:\n(A) camouflage\n(B) dress code\n(C) visibility\n(D) fashion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3046875, "loss": 4.132081031799316 }, { "id": "identity_16126", "image": "A-OKVQA/images/14966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tropical climate is suits for which tree?\nChoices:\n(A) coco bean\n(B) coconut\n(C) maple\n(D) palm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.59375, "loss": 4.0351057052612305 }, { "id": "identity_12775", "image": "A-OKVQA/images/483027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they here?\nChoices:\n(A) to rest\n(B) clean up\n(C) to eat\n(D) selling food" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.09375, "loss": 4.108820915222168 }, { "id": "identity_291101", "image": "PMC-VQA/images/PMC3669051_F4_208212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary subject of focus in the image?\nChoices:\n(A) Positive cells \n(B) Negative cells \n(C) Staining \n(D) Arrows" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.046875, "loss": 4.720876693725586 }, { "id": "identity_20496", "image": "A-OKVQA/images/78466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What game brand is sponsoring this facility?\nChoices:\n(A) wii\n(B) x-box\n(C) playstation 3\n(D) nintendo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.2265625, "loss": 4.062941551208496 }, { "id": "identity_330296", "image": "A-OKVQA/images/212501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this large green sculpture made up of?\nChoices:\n(A) paint\n(B) phones\n(C) flash lights\n(D) traffic lights" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.3125, "loss": 4.0013556480407715 }, { "id": "identity_301101", "image": "AI2D/images/4179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process causes magma to become igneous rock?\nChoices:\n(A) melting\n(B) cooling\n(C) pressure\n(D) weathering" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.15625, "loss": 4.526944160461426 }, { "id": "identity_335134", "image": "ScienceQA/images/13332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What term corresponds to this image?\nChoices:\n(A) vascular plant\n(B) nonvascular plant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.5859375, "loss": 4.1600799560546875 }, { "id": "identity_316350", "image": "ScienceQA/images/12495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Fiji\n(B) Kiribati\n(C) Tuvalu\n(D) Tonga" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.140625, "loss": 4.109738349914551 }, { "id": "identity_21897", "image": "TextVQA/images/e6b1ae1ea0e0c099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the catcher's number?" }, { "from": "gpt", "value": "The answer is no catch" } ], "clip_score": 23.671875, "loss": 4.932275295257568 }, { "id": "identity_301539", "image": "A-OKVQA/images/413874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the profession of the people in white?\nChoices:\n(A) athletes\n(B) nurses\n(C) doctors\n(D) teachers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9140625, "loss": 4.939089298248291 }, { "id": "identity_295759", "image": "PMC-VQA/images/PMC4168241_fig1_321153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object being identified in the image?\nChoices:\n(A) Ovarian cyst\n(B) Uterine fibroid\n(C) Endometrial carcinoma\n(D) Cervical polyp" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 4.570301532745361 }, { "id": "identity_216119", "image": "A-OKVQA/images/492471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the boom of the sailboat?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 25.671875, "loss": 4.429219722747803 }, { "id": "identity_304720", "image": "PMC-VQA/images/PMC5311912_f6_128224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to capture this image?\nChoices:\n(A) Transmission electron microscopy (TEM)\n(B) Scanning tunneling microscopy (STM)\n(C) Confocal laser-scanning microscopy\n(D) Atomic force microscopy (AFM)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.953125, "loss": 4.290293216705322 }, { "id": "identity_150014", "image": "DocVQA/images/ffgk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the meeting mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Houston" } ], "clip_score": 22.484375, "loss": 5.654728889465332 }, { "id": "identity_14044", "image": "ScienceQA/images/15797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this passage describe the weather or the climate?\nChoices:\n(A) climate\n(B) weather" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6875, "loss": 4.119260311126709 }, { "id": "identity_63402", "image": "AI2D/images/2913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paired fins are present in the fish diagram?" }, { "from": "gpt", "value": "The answer is Two pairs" } ], "clip_score": 31.625, "loss": 4.935797214508057 }, { "id": "identity_19250", "image": "IconQA/images/18214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is fourth?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.34375, "loss": 4.736054420471191 }, { "id": "identity_35435", "image": "DVQA/images/bar_train_00132043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest summed value?" }, { "from": "gpt", "value": "The answer is name" } ], "clip_score": 19.125, "loss": 4.070629596710205 }, { "id": "identity_18028", "image": "PMC-VQA/images/PMC4101714_F2_306698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What diagnostic tool was used for the biopsy?\nChoices:\n(A) MRI \n(B) CT scan \n(C) Ultrasound \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.734375, "loss": 4.073087215423584 }, { "id": "identity_294656", "image": "PMC-VQA/images/PMC4383018_f8-ijo-46-05-1872_374352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the staining of cytoplasm depicted in the image? \nOptions:\n(A) blue\n(B) red\n(C) green\n(D) yellow." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.8125, "loss": 4.128968715667725 }, { "id": "identity_182765", "image": "PMC-VQA/images/PMC7429820_Fig1_234904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to label structures in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.453125, "loss": 4.038443565368652 }, { "id": "identity_182556", "image": "TextVQA/images/040834237a7aab4a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minute hand pointing at?" }, { "from": "gpt", "value": "The answer is XII" } ], "clip_score": 21.1875, "loss": 4.040553092956543 }, { "id": "identity_271237", "image": "TQA/images/abc_question_images/human_system_reproductory_17044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which labeled organ is responsible for producing sperm and secreting testosterone? \n(A) E\n(B) C\n(C) R\n(D) T" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 4.069421291351318 }, { "id": "identity_19796", "image": "VQA2.0/images/501842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letters are on the truck?" }, { "from": "gpt", "value": "The answer is qcs" } ], "clip_score": 21.9375, "loss": 4.1424031257629395 }, { "id": "identity_315504", "image": "ScienceQA/images/14250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Los Angeles\n(B) Denver\n(C) Las Vegas\n(D) San Jose" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.03125, "loss": 4.557657241821289 }, { "id": "identity_197014", "image": "VQA2.0/images/136886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport is being played in this image?" }, { "from": "gpt", "value": "The answer is Rugby" } ], "clip_score": 29.53125, "loss": 5.052675247192383 }, { "id": "identity_332428", "image": "A-OKVQA/images/160740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this photo taken?\nChoices:\n(A) china\n(B) japan\n(C) north korea\n(D) south korea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.90625, "loss": 4.1622419357299805 }, { "id": "identity_315760", "image": "TextVQA/images/317b3596da5dcca3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on this white sign?" }, { "from": "gpt", "value": "The answer is upp" } ], "clip_score": 27.625, "loss": 4.264261245727539 }, { "id": "identity_20420", "image": "ScienceQA/images/7286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the time line, which event happens after James Marshall discovers gold and before gold becomes harder to find?\nChoices:\n(A) Many people move to California.\n(B) Silver is discovered in Nevada." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.90625, "loss": 4.077796936035156 }, { "id": "identity_330206", "image": "ScienceQA/images/16558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) sticky\n(B) bendable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.4714460372924805 }, { "id": "identity_300487", "image": "TextVQA/images/0e5fee6f27d73694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of tag is in the title of the comic?" }, { "from": "gpt", "value": "The answer is denkste dir so" } ], "clip_score": 20.875, "loss": 4.691963195800781 }, { "id": "identity_296204", "image": "DVQA/images/bar_train_00112888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the least accuracy?" }, { "from": "gpt", "value": "The answer is means" } ], "clip_score": 32.875, "loss": 4.734355449676514 }, { "id": "identity_12849", "image": "A-OKVQA/images/518685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green umbrella being used to block?\nChoices:\n(A) sun\n(B) wind\n(C) bugs\n(D) rain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.9296875, "loss": 4.2910356521606445 }, { "id": "identity_222109", "image": "VQA2.0/images/43664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pizza slices are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.390625, "loss": 4.3195695877075195 }, { "id": "identity_280776", "image": "TextVQA/images/5371cd2bc59dab7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is visible on camera 1?" }, { "from": "gpt", "value": "The answer is layers below" } ], "clip_score": 21.265625, "loss": 6.578436374664307 }, { "id": "identity_300251", "image": "TabMWP/images/31811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city had the fewest home sales in 2005 according to the real estate agent's tracking?\nChoices:\n(A) Pennytown\n(B) Briggs Corner\n(C) Liberty City\n(D) Parville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.6875, "loss": 4.272744655609131 }, { "id": "identity_37202", "image": "A-OKVQA/images/97097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the danger of partaking in this activity with no jacket?\nChoices:\n(A) starvation\n(B) hypothermia\n(C) bear attack\n(D) dehydration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.671875, "loss": 4.411063194274902 }, { "id": "identity_312337", "image": "FigureQA/images/62287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Firebrick?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.203125, "loss": 4.297450065612793 }, { "id": "identity_140239", "image": "DocVQA/images/hkkw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the contact person at the agency?" }, { "from": "gpt", "value": "The answer is Jack Haymes" } ], "clip_score": 20.609375, "loss": 5.460291862487793 }, { "id": "identity_308059", "image": "ScienceQA/images/5879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Papua New Guinea\n(B) Australia\n(C) Vanuatu\n(D) Nauru" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.421875, "loss": 4.3315839767456055 }, { "id": "identity_325286", "image": "A-OKVQA/images/430155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these overhead wires for?\nChoices:\n(A) powering homes\n(B) telephone lines\n(C) streetcars\n(D) powering businesses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.59375, "loss": 4.511723518371582 }, { "id": "identity_328182", "image": "Super-CLEVR/images/superCLEVR_new_000859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the tiny purple thing the same as the small brown metal object on the left side of the small purple matte vehicle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.734375, "loss": 5.096235752105713 }, { "id": "identity_38257", "image": "PMC-VQA/images/PMC6384036_FIG2_438696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is displayed in Panel A? \nChoices:\n(A) MRI \n(B) CT scan \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.75, "loss": 4.066113471984863 }, { "id": "identity_276395", "image": "TextVQA/images/9a6693f2ed5e340e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of a sponsor?" }, { "from": "gpt", "value": "The answer is ilkka" } ], "clip_score": 19.53125, "loss": 5.204311847686768 }, { "id": "identity_296670", "image": "ScienceQA/images/7045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image shown? \nChoices:\n(A) Rhode Island\n(B) Pennsylvania\n(C) South Carolina\n(D) Massachusetts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5, "loss": 4.640083312988281 }, { "id": "identity_23706", "image": "TextVQA/images/353cfc5f107e8805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what greeting is written on the stores window?" }, { "from": "gpt", "value": "The answer is hi!!" } ], "clip_score": 20.390625, "loss": 4.176681995391846 }, { "id": "identity_263119", "image": "A-OKVQA/images/461378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to construct the lighting item shown in the image? Is it:\n(A) wax\n(B) glass\n(C) bulbs\n(D) wood" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.875, "loss": 4.358850479125977 }, { "id": "identity_184217", "image": "PMC-VQA/images/PMC7339797_Fig4_206309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the yellow box highlight in the image?" }, { "from": "gpt", "value": "The answer is Ground-glass opacity" } ], "clip_score": 21.640625, "loss": 4.189499378204346 }, { "id": "identity_107246", "image": "VQA2.0/images/518754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bridges are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.703125, "loss": 4.4459686279296875 }, { "id": "identity_318839", "image": "DVQA/images/bar_train_00183508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 20.875, "loss": 4.288590431213379 }, { "id": "identity_274413", "image": "TQA/images/question_images/parts_worm_7346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the outer covering of a worm?\nChoices:\n(A) Cuticle\n(B) Median bulb\n(C) Nerve ring\n(D) Stylet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.203125, "loss": 4.044934272766113 }, { "id": "identity_4154", "image": "VizWiz/images/VizWiz_train_00012319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.359375, "loss": 4.2273945808410645 }, { "id": "identity_291503", "image": "PMC-VQA/images/PMC6038976_F2_340196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What other observation was made in addition to the solid mass? \nChoices:\n(A) Bleeding \n(B) Fibrosis \n(C) Calcification \n(D) Infection" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.53125, "loss": 4.123039245605469 }, { "id": "identity_145035", "image": "DocVQA/images/jhjh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of Con Marshall at Scottsbluff Star-Herald?" }, { "from": "gpt", "value": "The answer is Farm Writer" } ], "clip_score": 19.90625, "loss": 4.437930107116699 }, { "id": "identity_308763", "image": "FigureQA/images/79058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 4.430498123168945 }, { "id": "identity_250287", "image": "PMC-VQA/images/PMC2992919_F0001_79809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure that the arrow head is pointing to and describe its significance in the context of an angiogram" }, { "from": "gpt", "value": "The answer is Tumor bush indicates abnormal blood vessels supplying a tumor" } ], "clip_score": 29.0, "loss": 4.294301509857178 }, { "id": "identity_320668", "image": "PMC-VQA/images/PMC4542182_Figure4_415736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is depicted in the image?\nChoices:\n(A) Vascular channel wall\n(B) Smooth muscle cells\n(C) Cancer cells\n(D) Blood clots" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.140625, "loss": 4.407917499542236 }, { "id": "identity_65779", "image": "ScienceQA/images/6932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can the presence of breccia, as shown in the image, indicate about the rock's geological history?" }, { "from": "gpt", "value": "The answer is Violent geological events" } ], "clip_score": 26.8125, "loss": 4.638640403747559 }, { "id": "identity_274675", "image": "IconQA/images/5339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do all the dogs have a food bowl each?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.046875, "loss": 4.128636837005615 }, { "id": "identity_303401", "image": "Super-CLEVR/images/superCLEVR_new_000987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the object that is in front of the vehicle and to the left of the small purple shiny object?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 21.90625, "loss": 4.625300407409668 }, { "id": "identity_328308", "image": "PMC-VQA/images/PMC4779085_Fig5_8741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality seen in this image?\nChoices:\n(A) right coronary artery aneurysm\n(B) left coronary artery aneurysm\n(C) pulmonary arterial hypertension\n(D) aortic stenosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.580628395080566 }, { "id": "identity_281069", "image": "PMC-VQA/images/PMC2734940_fig2_44592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the central dot in the MRI image of the cyst symbolize? Is it:\n(A) Calcifications\n(B) Scolex\n(C) Fluid\n(D) Tumor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 5.2410197257995605 }, { "id": "identity_112593", "image": "A-OKVQA/images/56109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the sidewalk in this image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.453125, "loss": 4.41776180267334 }, { "id": "identity_17044", "image": "FigureQA/images/20684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.96875, "loss": 4.777781009674072 }, { "id": "identity_54157", "image": "DVQA/images/bar_train_00009138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the least preferred category under the 'master' label?" }, { "from": "gpt", "value": "The answer is categories" } ], "clip_score": 27.296875, "loss": 5.176079750061035 }, { "id": "identity_31334", "image": "VizWiz/images/VizWiz_train_00017646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label say on this bottle? Thank you." }, { "from": "gpt", "value": "The answer is peroxyl colgate" } ], "clip_score": 36.4375, "loss": 4.023387908935547 }, { "id": "identity_299259", "image": "PMC-VQA/images/PMC4499221_Fig4_405305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is contained in image d?\nChoices:\n(A) Details of areas A.\n(B) Dental subimage with two connected neighboring regions.\n(C) Common boundary detection.\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.921875, "loss": 4.311300754547119 }, { "id": "identity_276283", "image": "A-OKVQA/images/136770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brand of footwear is the individual in the white shirt wearing? Is it adidas, nike, skechers, or new balance?\nChoices:\n(A) adidas\n(B) nike\n(C) skechers\n(D) new balance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.3125, "loss": 4.021312236785889 }, { "id": "identity_329548", "image": "ScienceQA/images/3214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) New Haven\n(B) Hartford\n(C) Philadelphia\n(D) Montpelier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.015625, "loss": 4.018792152404785 }, { "id": "identity_292658", "image": "A-OKVQA/images/318701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of being does the person resemble in terms of appearance?\nChoices:\n(A) ghost\n(B) werewolf\n(C) vampire\n(D) wendigo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.96875, "loss": 4.604117393493652 }, { "id": "identity_19998", "image": "PMC-VQA/images/PMC6156119_FIG2_371951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the report, what area of the brain was most likely affected?\nChoices:\n(A) Left-sided temporal area \n(B) Right-sided parietal area \n(C) Frontal lobe \n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.453125, "loss": 4.167519569396973 }, { "id": "identity_271327", "image": "AI2D/images/3690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is represented by the letter C in the given choices? \n(A) none of the above\n(B) stomach\n(C) appendix\n(D) salivary glands" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.125, "loss": 4.730123519897461 }, { "id": "identity_269551", "image": "AI2D/images/3538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the central most part of the diagram labeled as B.\nChoices:\n(A) Centriole\n(B) Lysosome\n(C) Karyon\n(D) Endosome" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.734375, "loss": 4.270360469818115 }, { "id": "identity_282227", "image": "PMC-VQA/images/PMC3874542_F5_254590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which column indicates the alterations in connectivity of the L dlPFC?\nOptions:\n(A) Left\n(B) Right\n(C) Middle\n(D) Upper" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.578125, "loss": 4.650204658508301 }, { "id": "identity_23562", "image": "ScienceQA/images/4426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) sour\n(B) translucent\n(C) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.125, "loss": 4.362395286560059 }, { "id": "identity_110318", "image": "VQA2.0/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the refrigerator in this kitchen?" }, { "from": "gpt", "value": "The answer is Not visible" } ], "clip_score": 24.40625, "loss": 4.136357307434082 }, { "id": "identity_301622", "image": "PMC-VQA/images/PMC3292556_pone-0032429-g007_128046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this show?\nChoices:\n(A) Cross sections of a frog\n(B) Frontal sections of a human brain\n(C) Sagittal sections of a mouse\n(D) Transverse sections of a chicken egg" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.0625, "loss": 4.337464332580566 }, { "id": "identity_310473", "image": "TabMWP/images/4338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tommy's campground is near two hiking trails with scenic endpoints. One trail leads to an underground lake at the bottom of a small cave. The other trail leads to a mountain peak. The elevation of each endpoint is shown on the campground map. Which trail's endpoint is at a higher elevation?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.3125, "loss": 4.006950855255127 }, { "id": "identity_282018", "image": "ScienceQA/images/15554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from each refrigerator to the surroundings or from the surroundings to each refrigerator?\nChoices:\n(A) each refrigerator . . . the surroundings\n(B) the surroundings . . . each refrigerator" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.1875, "loss": 4.120580196380615 }, { "id": "identity_66014", "image": "ScienceQA/images/6233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color used to represent the other states in this map?" }, { "from": "gpt", "value": "The answer is Light green" } ], "clip_score": 31.125, "loss": 4.234292984008789 }, { "id": "identity_293526", "image": "A-OKVQA/images/524557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which role is the kid in yellow performing among the following choices: umpire, water boy, goalie, or referee?\nChoices:\n(A) umpire\n(B) water boy\n(C) goalie\n(D) referee" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.765625, "loss": 4.255892276763916 }, { "id": "identity_73857", "image": "AI2D/images/139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phases of the moon are depicted before it reaches the Full Moon starting from the New Moon?" }, { "from": "gpt", "value": "The answer is Three phases" } ], "clip_score": 30.8125, "loss": 4.600958347320557 }, { "id": "identity_250433", "image": "A-OKVQA/images/26448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the material used to depict the snow in this art piece and explain why it is not one of the other materials listed" }, { "from": "gpt", "value": "The answer is cotton" } ], "clip_score": 27.046875, "loss": 4.07871675491333 }, { "id": "identity_130235", "image": "VQA-AS/images/2434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many food items can be seen on the blanket?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.171875, "loss": 4.723696708679199 }, { "id": "identity_194418", "image": "PMC-VQA/images/PMC9429763_Fig6_389875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a foreign body in the image?" }, { "from": "gpt", "value": "The answer is No foreign body evident" } ], "clip_score": 25.734375, "loss": 4.384205341339111 }, { "id": "identity_315317", "image": "AI2D/images/3938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part does this label refer to?\nChoices:\n(A) Heart\n(B) Small intestines\n(C) Stomach\n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.328125, "loss": 5.146625518798828 }, { "id": "identity_268302", "image": "A-OKVQA/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the circular object on the floor, connected to the wall, clean?\nChoices:\n(A) shoes\n(B) floors\n(C) dishes\n(D) walls" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6328125, "loss": 4.654286861419678 }, { "id": "identity_224205", "image": "TextVQA/images/ea7f0c93cf7d59ba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the word written in red under the exclamation mark on the left, and what is the word written in red directly below it?" }, { "from": "gpt", "value": "The answer is soap, atom" } ], "clip_score": 17.953125, "loss": 5.752618312835693 }, { "id": "identity_268553", "image": "TextVQA/images/20e5bcf73f20bdb6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is visible at the top right corner of the box?" }, { "from": "gpt", "value": "The answer is sinful color" } ], "clip_score": 10.09375, "loss": 5.79409646987915 }, { "id": "identity_30479", "image": "PMC-VQA/images/PMC8184106_FIG2_461212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does CT stand for? \nChoices:\n(A) Computerized Treatment \n(B) Computed Tomography \n(C) Cardiac Testing \n(D) Cranial Trauma " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.78125, "loss": 4.210949897766113 }, { "id": "identity_24202", "image": "A-OKVQA/images/196170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the patrons eating with chopsticks?\nChoices:\n(A) superiority\n(B) for fun\n(C) for authenticity\n(D) as joke" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.578125, "loss": 4.209512233734131 }, { "id": "identity_306416", "image": "IconQA/images/106502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a house. Which image is eighth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.671875, "loss": 4.098345756530762 }, { "id": "identity_184458", "image": "TextVQA/images/15187b59c9f66bc9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word in the title of the book with a blue spine on the right?" }, { "from": "gpt", "value": "The answer is ROWLING" } ], "clip_score": 12.5546875, "loss": 4.166073322296143 }, { "id": "identity_159", "image": "ScienceQA/images/12147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Indiana\n(B) Virginia\n(C) Vermont\n(D) West Virginia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.2651686668396 }, { "id": "identity_336481", "image": "PMC-VQA/images/PMC6819370_F2_57889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this CT pattern consistent with? \nChoices:\n(A) Focal pneumonia \n(B) Usual interstitial pneumonia \n(C) Atypical pneumonia \n(D) Bronchiolitis obliterans organizing pneumonia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 4.432228088378906 }, { "id": "identity_197804", "image": "VizWiz/images/VizWiz_train_00019517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object's lid?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 19.125, "loss": 4.1185197830200195 }, { "id": "identity_305688", "image": "Super-CLEVR/images/superCLEVR_new_001646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte aeroplane of a similar size to the bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.78125, "loss": 5.2291131019592285 }, { "id": "identity_284398", "image": "PMC-VQA/images/PMC9188905_Fig1_309252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the distinguishing characteristic seen on the chest computed tomography scan?\nChoices:\n(A) Bilateral diffuse patchy miliary nodules \n(B) Normal CT scan of the chest \n(C) Increased lung markings with patchy infiltration \n(D) Unilateral diffuse patchy miliary nodules." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.21875, "loss": 4.353420257568359 }, { "id": "identity_273714", "image": "PMC-VQA/images/PMC7454352_awaa240-F2_241631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was most probably utilized to detect the marked progression of the lesions?\nOptions:\n(A) Computed tomography (CT)\n(B) X-ray\n(C) Magnetic resonance imaging (MRI)\n(D) Positron emission tomography (PET)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.1875, "loss": 4.02009391784668 }, { "id": "identity_290566", "image": "DVQA/images/bar_train_00126021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.15625, "loss": 4.239808082580566 }, { "id": "identity_142479", "image": "DocVQA/images/qtfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Executive Secretary of The Nutrition Foundation, Inc.?" }, { "from": "gpt", "value": "The answer is Ole Salthe" } ], "clip_score": 19.203125, "loss": 6.105498313903809 }, { "id": "identity_136634", "image": "DVQA/images/bar_train_00091882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for 'guard'?" }, { "from": "gpt", "value": "The answer is grave" } ], "clip_score": 28.1875, "loss": 4.148841857910156 }, { "id": "identity_275294", "image": "FigureQA/images/87241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.921875, "loss": 4.703319072723389 }, { "id": "identity_289002", "image": "TQA/images/abc_question_images/sun_layers_16310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Between the convection zone and photosphere, which option represents the correct choice?\n(A) M\n(B) P\n(C) J\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.1834516525268555 }, { "id": "identity_312029", "image": "ScienceQA/images/17983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the timeline, which event occurs first?\nChoices:\n(A) California's population surpasses 300,000.\n(B) Gold becomes more difficult to find in California." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.25, "loss": 4.123750686645508 }, { "id": "identity_217874", "image": "A-OKVQA/images/495756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible on the truck?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.25, "loss": 4.094422340393066 }, { "id": "identity_291666", "image": "A-OKVQA/images/36417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which language are the majority of the words on the banana written?\nChoices:\n(A) English\n(B) Japanese\n(C) Russian\n(D) French" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.28125, "loss": 4.155550003051758 }, { "id": "identity_291357", "image": "PMC-VQA/images/PMC3857525_F1_250494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which convention are the FLAIR images presented?\nChoices:\n(A) Radiological\n(B) Neurological\n(C) Inverted\n(D) Oblique" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.345318794250488 }, { "id": "identity_16135", "image": "FigureQA/images/56814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.109375, "loss": 4.169880390167236 }, { "id": "identity_316190", "image": "ScienceQA/images/20460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) stretchy\n(B) opaque" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.15625, "loss": 4.453033447265625 }, { "id": "identity_63183", "image": "TQA/images/abc_question_images/tides_10136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the waxing crescent phase of the moon?" }, { "from": "gpt", "value": "The answer is None depicted" } ], "clip_score": 27.953125, "loss": 4.5482378005981445 }, { "id": "identity_225641", "image": "A-OKVQA/images/448176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in loading and unloading the vehicles on the boat, and how does this process ensure the vehicles are transported safely across the water?" }, { "from": "gpt", "value": "The answer is Ensures safe transport across water" } ], "clip_score": 34.0, "loss": 4.4406256675720215 }, { "id": "identity_124053", "image": "VQA2.0/images/527139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vases are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.921875, "loss": 4.519613742828369 }, { "id": "identity_25045", "image": "FigureQA/images/34662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dim Gray the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9375, "loss": 4.229760646820068 }, { "id": "identity_541", "image": "IconQA/images/39789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a turtle. Which picture is second?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.40625, "loss": 4.486111640930176 }, { "id": "identity_297791", "image": "PMC-VQA/images/PMC6947566_genes-10-00953-f003_96077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the OCT scan for the left eye? \nChoices:\n(A) Subretinal deposits with intraretinal fluid\n(B) Intraretinal fluid with no subretinal deposit\n(C) Subretinal mass with no intraretinal fluid\n(D) Normal OCT scan." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.741879463195801 }, { "id": "identity_325940", "image": "IconQA/images/78656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a square face on this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.484375, "loss": 4.023421287536621 }, { "id": "identity_314233", "image": "TextVQA/images/cde31f9f8fe1118a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this drink?" }, { "from": "gpt", "value": "The answer is polar" } ], "clip_score": 32.8125, "loss": 4.1312336921691895 }, { "id": "identity_304767", "image": "PMC-VQA/images/PMC5655893_Fig7_231377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the item being pointed out?\nChoices:\n(A) Black \n(B) Brown \n(C) Red \n(D) Green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6875, "loss": 4.5498456954956055 }, { "id": "identity_53819", "image": "DocVQA/images/khnk0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which insecticide and application method combination resulted in the least damage rating?" }, { "from": "gpt", "value": "The answer is Dyfonate with ST method" } ], "clip_score": 25.203125, "loss": 4.517251014709473 }, { "id": "identity_274668", "image": "FigureQA/images/71519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.2673468589782715 }, { "id": "identity_221648", "image": "A-OKVQA/images/409050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.515625, "loss": 4.5171074867248535 }, { "id": "identity_300627", "image": "DocVQA/images/nqlm0080_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand?" }, { "from": "gpt", "value": "The answer is Taste" } ], "clip_score": 18.3125, "loss": 4.804818153381348 }, { "id": "identity_291298", "image": "A-OKVQA/images/147681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which location did the skateboarder start this move? \n(A) up above\n(B) mid level\n(C) grassy area\n(D) bottom step" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.140625, "loss": 4.225568771362305 }, { "id": "identity_332593", "image": "A-OKVQA/images/350463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What genus does this belong to?\nChoices:\n(A) musa\n(B) malus\n(C) citrus\n(D) ficus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.8984375, "loss": 4.311758518218994 }, { "id": "identity_331581", "image": "A-OKVQA/images/119614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cat is this?\nChoices:\n(A) persian\n(B) orange tabby\n(C) bengal\n(D) maine coon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.297544479370117 }, { "id": "identity_3864", "image": "VizWiz/images/VizWiz_train_00011564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Good morning. Is this necklace gold or silver? Thank you for your thought." }, { "from": "gpt", "value": "The answer is silver white gold" } ], "clip_score": 27.875, "loss": 4.608050346374512 }, { "id": "identity_303473", "image": "FigureQA/images/55193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.09375, "loss": 4.700369834899902 }, { "id": "identity_317200", "image": "ScienceQA/images/6449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Montpelier\n(B) Burlington\n(C) Salem\n(D) Boston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.890625, "loss": 4.6233673095703125 }, { "id": "identity_306596", "image": "FigureQA/images/42978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.0144124031066895 }, { "id": "identity_336111", "image": "PMC-VQA/images/PMC4563085_fig1_421918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding seen in this region?\nChoices:\n(A) Pleural effusion \n(B) Consolidation \n(C) Pleural thickening \n(D) Atelectasis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 5.1102800369262695 }, { "id": "identity_6776", "image": "PMC-VQA/images/PMC3919884_f1-ol-07-03-0894_265919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the left medial ilium in the scan? \nChoices:\n(A) Destroyed \n(B) Enlarged \n(C) Normal \n(D) Inflamed " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 4.45316219329834 }, { "id": "identity_268135", "image": "AI2D/images/3647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To what are the left pulmonary veins connected? \nChoices:\n(A) aorta\n(B) left ventricle\n(C) left atrium\n(D) superior vena cava" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.5625, "loss": 4.09496545791626 }, { "id": "identity_29182", "image": "PMC-VQA/images/PMC8055998_Fig4_420820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which regions are shown in greater detail? \nChoices:\n(A) The white circles \n(B) The black squares \n(C) The yellow triangles \n(D) The white rectangles " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.3828125, "loss": 4.061713218688965 }, { "id": "identity_110102", "image": "VizWiz/images/VizWiz_train_00013884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are visible in the vase?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.21875, "loss": 4.89963960647583 }, { "id": "identity_266306", "image": "PMC-VQA/images/PMC6934604_Fig7_91695.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What measurement does the scale bar in image (A) represent?\nChoices:\n(A) 1 μm\n(B) 1 cm\n(C) 1 mm\n(D) 1 nm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.2421875, "loss": 4.083343505859375 }, { "id": "identity_136254", "image": "DVQA/images/bar_train_00051248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for the object 'tool'?" }, { "from": "gpt", "value": "The answer is dear" } ], "clip_score": 29.796875, "loss": 4.321355819702148 }, { "id": "identity_312538", "image": "VizWiz/images/VizWiz_train_00000968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of vehicle is that?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.75, "loss": 4.332714080810547 }, { "id": "identity_290586", "image": "TQA/images/question_images/human_system_nervous_6170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area of the brain controls movement? \nOptions:\n(A) occipital lobe\n(B) frontal lobe\n(C) parietal lobe\n(D) temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.328125, "loss": 4.118056297302246 }, { "id": "identity_292632", "image": "A-OKVQA/images/124907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of hat is the man on the right wearing, with the options being a bowlers cap, newsboy cap, bottle cap, or baseball cap?\nChoices:\n(A) bowlers cap\n(B) newsboy cap\n(C) bottle cap\n(D) baseball cap" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4140625, "loss": 4.025277614593506 }, { "id": "identity_304196", "image": "PMC-VQA/images/PMC2895667_ppat-1000980-g007_68025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is depicted in the image?\nChoices:\n(A) Cerebellum\n(B) Hippocampus\n(C) Amygdala\n(D) Thalamus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.890625, "loss": 4.158755302429199 }, { "id": "identity_277625", "image": "A-OKVQA/images/410105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the clothes placed on hangers?\nChoices:\n(A) for sale\n(B) hiding\n(C) airing out\n(D) cleaning" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.578125, "loss": 4.697144031524658 }, { "id": "identity_266053", "image": "AI2D/images/3061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to the cytosol?\nChoices:\n(A) d\n(B) n\n(C) f\n(D) p" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.65625, "loss": 4.0521979331970215 }, { "id": "identity_149768", "image": "DocVQA/images/fgjh0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the Manager at Lego by Samsonite?" }, { "from": "gpt", "value": "The answer is Norman Saffer" } ], "clip_score": 21.234375, "loss": 4.7279486656188965 }, { "id": "identity_153941", "image": "ScienceQA/images/12251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wind turbine models are displayed on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.125, "loss": 4.549665927886963 }, { "id": "identity_12454", "image": "DVQA/images/bar_train_00162206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is system" } ], "clip_score": 23.140625, "loss": 4.8953142166137695 }, { "id": "identity_207847", "image": "A-OKVQA/images/498137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.015625, "loss": 4.7337646484375 }, { "id": "identity_77990", "image": "TQA/images/question_images/skin_cross_section_7581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vessels are shown in the skin diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.53125, "loss": 4.331758975982666 }, { "id": "identity_70487", "image": "TQA/images/question_images/parts_fish_2905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pectoral fins does the fish have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 37.34375, "loss": 4.6278181076049805 }, { "id": "identity_35599", "image": "PMC-VQA/images/PMC9136286_F1_290541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the corpus callosum is the lesion showing patchy enhancement in image B? \nChoices:\n(A) Left genu, \n(B) Right splenium, \n(C) Middle body, \n(D) Anterior portion. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.234375, "loss": 4.098211288452148 }, { "id": "identity_243505", "image": "PMC-VQA/images/PMC6294250_Fig1_411683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the observed phenomenon of partial retention in intracellular structures, what can be inferred about the potential impact on cellular function and the possible underlying mechanisms?" }, { "from": "gpt", "value": "The answer is Disruption in cellular transport and processing mechanisms" } ], "clip_score": 24.046875, "loss": 4.034962177276611 }, { "id": "identity_27550", "image": "PMC-VQA/images/PMC9356014_Fig1_365449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the third row in the diagram show? \nChoices:\n(A) Anterior vascular leakage grading, \n(B) Posterior vascular leakage grading, \n(C) Macular hyperfluorescence grading, \n(D) Leakage domain grading. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.296875, "loss": 4.334219932556152 }, { "id": "identity_314775", "image": "FigureQA/images/46944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.28125, "loss": 4.424631595611572 }, { "id": "identity_67260", "image": "TQA/images/abc_question_images/ocean_waves_17120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature is represented by the label 'L'?" }, { "from": "gpt", "value": "The answer is Trough" } ], "clip_score": 22.515625, "loss": 4.060527324676514 }, { "id": "identity_126239", "image": "TextVQA/images/01209963ab93abd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many water bottles can be seen on the podium?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 12.0390625, "loss": 4.54799222946167 }, { "id": "identity_33936", "image": "PMC-VQA/images/PMC6006706_Fig1_330441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the lesions located in the MRI brain images? \nChoices:\n(A) Temporal lobe \n(B) Frontal lobe \n(C) Occipital lobe \n(D) Parietal lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 4.517833709716797 }, { "id": "identity_302617", "image": "A-OKVQA/images/2525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person standing behind the cupcake displays?\nChoices:\n(A) selling them\n(B) for practice\n(C) smelling them\n(D) hoarding them" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.21875, "loss": 4.238635063171387 }, { "id": "identity_192304", "image": "A-OKVQA/images/123535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible sheep are there in the trailer?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.140625, "loss": 4.201024532318115 }, { "id": "identity_295330", "image": "A-OKVQA/images/261945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity does the woman in blue lying down plan to engage in?\nChoices:\n(A) push ups\n(B) crunches\n(C) sunbath\n(D) make out" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.859375, "loss": 4.0153117179870605 }, { "id": "identity_306092", "image": "PMC-VQA/images/PMC5700104_Fig2_243135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dip in the plot indicate?\nChoices:\n(A) Higher prey growth \n(B) Lower prey growth \n(C) Higher predator population \n(D) Lower predator population" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.46875, "loss": 4.571653366088867 }, { "id": "identity_335092", "image": "A-OKVQA/images/501571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting are these individuals on horseback located?\nChoices:\n(A) park\n(B) riot\n(C) mall\n(D) grocery store" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.765625, "loss": 4.20247745513916 }, { "id": "identity_327181", "image": "A-OKVQA/images/296374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of place was this photo taken in front of?\nChoices:\n(A) view point\n(B) restaurant\n(C) ski lodge\n(D) museum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.65625, "loss": 4.364681243896484 }, { "id": "identity_332359", "image": "A-OKVQA/images/24380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What soft drink is advertised to the left of this place?\nChoices:\n(A) mountain dew\n(B) 7-up\n(C) coke zero\n(D) diet pepsi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5859375, "loss": 4.015913486480713 }, { "id": "identity_291399", "image": "Super-CLEVR/images/superCLEVR_new_001877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small object located behind the gray metal truck have a similar shape to the rubber item positioned to the right of the small blue bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.71875, "loss": 5.064772605895996 }, { "id": "identity_28812", "image": "TextVQA/images/b53f035584520f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the white long sleeve shirt in the back?" }, { "from": "gpt", "value": "The answer is ertec love ny" } ], "clip_score": 18.6875, "loss": 5.500617980957031 }, { "id": "identity_327688", "image": "TQA/images/question_images/seasons_6286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which event occurs between these two seasons in the northern hemisphere?\nChoices:\n(A) December Solstice\n(B) September Equinox\n(C) March Equinox\n(D) June Solstice" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.0, "loss": 4.528254508972168 }, { "id": "identity_264129", "image": "DVQA/images/bar_train_00186512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all the stores, which item had the highest total number of units sold?" }, { "from": "gpt", "value": "The answer is notion" } ], "clip_score": 32.40625, "loss": 5.193747043609619 }, { "id": "identity_275084", "image": "TabMWP/images/4779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day did Chad drive the most miles according to his driving log?\nChoices:\n(A) Sunday\n(B) Monday\n(C) Tuesday\n(D) Wednesday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.96875, "loss": 4.190910816192627 }, { "id": "identity_318665", "image": "TabMWP/images/13068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mandy buys 1/2 of a kilogram of some Spanish olives. What is the total cost?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.40625, "loss": 4.008764266967773 }, { "id": "identity_67365", "image": "AI2D/images/3247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of endoplasmic reticulum are shown in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.984375, "loss": 4.202925682067871 }, { "id": "identity_218216", "image": "VQA2.0/images/42371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are pulling the carriage in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.265625, "loss": 4.396173000335693 }, { "id": "identity_212813", "image": "TextVQA/images/32d2b77fefbf9bab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the beach?" }, { "from": "gpt", "value": "The answer is One person" } ], "clip_score": 19.453125, "loss": 4.303422927856445 }, { "id": "identity_5807", "image": "TQA/images/question_images/erosion_8068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is under hard rock?\nChoices:\n(A) Water\n(B) Soft rock\n(C) Nothing\n(D) Stream" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 4.233626842498779 }, { "id": "identity_231406", "image": "VizWiz/images/VizWiz_train_00005551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is shown in the image, and what is its current price if the original price was $6?" }, { "from": "gpt", "value": "The answer is Chili, $3" } ], "clip_score": 32.53125, "loss": 4.12193489074707 }, { "id": "identity_275631", "image": "AI2D/images/1080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism is depicted in the diagram provided?\nChoices:\n(A) Cell\n(B) None of the above\n(C) Plant\n(D) Volcano" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.34375, "loss": 4.1336669921875 }, { "id": "identity_317364", "image": "A-OKVQA/images/79975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the event of the person losing their balance, what will protect their head?\nChoices:\n(A) shirt\n(B) helmet\n(C) knee pads\n(D) wrist guards" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.421875, "loss": 4.431147575378418 }, { "id": "identity_289569", "image": "A-OKVQA/images/57641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most popular activity that people here want to do today?\nChoices:\n(A) swimming\n(B) robbery\n(C) waiting\n(D) shopping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.953125, "loss": 4.208043098449707 }, { "id": "identity_221612", "image": "TextVQA/images/1daf09612004fe93.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many advertisements for hotels can be seen on the board?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.546875, "loss": 4.373693943023682 }, { "id": "identity_268864", "image": "TextVQA/images/c6efa7ccba50d7e5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sign is displayed in this image?" }, { "from": "gpt", "value": "The answer is stop" } ], "clip_score": 26.953125, "loss": 4.157596588134766 }, { "id": "identity_288585", "image": "ScienceQA/images/12921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) colorful\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.353853702545166 }, { "id": "identity_110470", "image": "VQA-AS/images/17628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squirrels are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.21875, "loss": 4.404997825622559 }, { "id": "identity_21814", "image": "AI2D/images/4045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which is the hottest part\nChoices:\n(A) core\n(B) none\n(C) envelope\n(D) flair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.234375, "loss": 4.2765913009643555 }, { "id": "identity_198248", "image": "VQA2.0/images/419599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.90625, "loss": 4.613824367523193 }, { "id": "identity_203905", "image": "PMC-VQA/images/PMC9290109_fig2-23247096221111764_343062.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign called that is seen in the left lung?" }, { "from": "gpt", "value": "The answer is Air crescent sign" } ], "clip_score": 27.0625, "loss": 4.195530414581299 }, { "id": "identity_301321", "image": "AI2D/images/3876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the second order branch?\nChoices:\n(A) B\n(B) E\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.156935691833496 }, { "id": "identity_289565", "image": "A-OKVQA/images/579739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which time of day is depicted? Is it:\n(A) morning\n(B) evening\n(C) afternoon\n(D) night" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.9921875, "loss": 4.375554084777832 }, { "id": "identity_137563", "image": "DocVQA/images/htfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used for the fax?" }, { "from": "gpt", "value": "The answer is IFC" } ], "clip_score": 24.40625, "loss": 5.0890021324157715 }, { "id": "identity_318244", "image": "Super-CLEVR/images/superCLEVR_new_001157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tiny brown thing that is the same shape as the large matte thing made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 18.0, "loss": 4.119136333465576 }, { "id": "identity_22977", "image": "A-OKVQA/images/182715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is this food being prepared?\nChoices:\n(A) scooped\n(B) sliced\n(C) boiled\n(D) poured" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5546875, "loss": 4.2215046882629395 }, { "id": "identity_308702", "image": "A-OKVQA/images/141920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is this?\nChoices:\n(A) comic\n(B) grocery\n(C) beauty\n(D) store" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.0, "loss": 4.1381659507751465 }, { "id": "identity_320014", "image": "A-OKVQA/images/517570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the goal of the person working on the boat here?\nChoices:\n(A) waterproofing\n(B) decorative\n(C) spy craft\n(D) weight loss" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9765625, "loss": 4.689015865325928 }, { "id": "identity_239317", "image": "VQA2.0/images/162768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different destinations are indicated by the signs, and what are they?" }, { "from": "gpt", "value": "The answer is Seven unique destinations" } ], "clip_score": 25.390625, "loss": 4.694314002990723 }, { "id": "identity_204085", "image": "PMC-VQA/images/PMC7836564_Fig3_353443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the body is labeled with the letter 'L'?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 13.46875, "loss": 4.1313042640686035 }, { "id": "identity_130881", "image": "VQA2.0/images/224483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.59375, "loss": 4.320430755615234 }, { "id": "identity_5510", "image": "A-OKVQA/images/21083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the silver vehicle by the building used for?\nChoices:\n(A) transport luggage\n(B) sell tickets\n(C) clean grounds\n(D) shuttle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.6875, "loss": 4.745352268218994 }, { "id": "identity_105691", "image": "VQA2.0/images/338528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No clear people visible" } ], "clip_score": 16.8125, "loss": 4.712507724761963 }, { "id": "identity_8019", "image": "ScienceQA/images/18615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) orca\n(B) sea otter\n(C) sea urchin\n(D) phytoplankton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.703125, "loss": 4.2652153968811035 }, { "id": "identity_6969", "image": "A-OKVQA/images/46537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The person on the motorcycle escorting the double decker bus is what type of public servant?\nChoices:\n(A) fireman\n(B) soldier\n(C) policeman\n(D) inspector" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.15625, "loss": 4.233625411987305 }, { "id": "identity_24590", "image": "AI2D/images/4343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What represents a clementine in the figure?\nChoices:\n(A) N\n(B) none of the above\n(C) B\n(D) M" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.53125, "loss": 4.261745452880859 }, { "id": "identity_275510", "image": "ScienceQA/images/11699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this balloon?\nChoices:\n(A) ceramic\n(B) rubber" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.073637962341309 }, { "id": "identity_327343", "image": "IconQA/images/21697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after this." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 27.890625, "loss": 4.819071292877197 }, { "id": "identity_13655", "image": "IconQA/images/63660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer dogs than bones?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.46875, "loss": 4.078460216522217 }, { "id": "identity_288703", "image": "TabMWP/images/34372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 4.543883800506592 }, { "id": "identity_330616", "image": "PMC-VQA/images/PMC8130232_acm213246-fig-0001_444310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of these image slices mentioned in the caption?\nChoices:\n(A) To show a virtual phantom image slice after an HN image slice for qualitative comparison. \n(B) To show the HN image slice before a virtual phantom image slice for qualitative comparison. \n(C) To show two virtual phantom image slices for quantitative comparison. \n(D) To show the HN image slice after a virtual phantom image slice for quantitative comparison." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.609375, "loss": 4.018743991851807 }, { "id": "identity_24750", "image": "A-OKVQA/images/304150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in front of the man in the first row wearing glasses?\nChoices:\n(A) laptop\n(B) lion\n(C) cow\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.796875, "loss": 4.401930332183838 }, { "id": "identity_18497", "image": "PMC-VQA/images/PMC7880116_f0003_366918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the portable chest X-ray reveal? \nChoices:\n(A) Enlarged liver \n(B) Heart disease \n(C) Lung cancer \n(D) Brain tumor " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.578125, "loss": 4.786955833435059 }, { "id": "identity_266792", "image": "PMC-VQA/images/PMC7683662_Fig3_303608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific component of the bone is undergoing replacement in the body?\nChoices:\n(A) Periosteum \n(B) Bone marrow \n(C) Bony trabeculae \n(D) Cortical bone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.15625, "loss": 4.166664123535156 }, { "id": "identity_112786", "image": "Super-CLEVR/images/superCLEVR_new_001806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the largest in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 21.40625, "loss": 4.172367572784424 }, { "id": "identity_40368", "image": "DVQA/images/bar_train_00098945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest number of units sold in the store \"sight\"?" }, { "from": "gpt", "value": "The answer is Way" } ], "clip_score": 30.578125, "loss": 4.02158784866333 }, { "id": "identity_219131", "image": "A-OKVQA/images/425772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible buoy in the image?" }, { "from": "gpt", "value": "The answer is Yes, orange" } ], "clip_score": 16.171875, "loss": 5.385319709777832 }, { "id": "identity_31378", "image": "PMC-VQA/images/PMC7757331_F3_325476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging modality used to obtain the images mentioned? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.9375, "loss": 4.451996803283691 }, { "id": "identity_146966", "image": "MapQA/images/map_9985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range of works in California?" }, { "from": "gpt", "value": "The answer is Lightest red color" } ], "clip_score": 28.90625, "loss": 4.778117656707764 }, { "id": "identity_283117", "image": "A-OKVQA/images/215738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man in pink engaged in on the street?\nChoices:\n(A) driving\n(B) selling\n(C) cleaning\n(D) crossing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.59375, "loss": 4.463812828063965 }, { "id": "identity_286642", "image": "PMC-VQA/images/PMC7681181_Fig14_302897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which abnormality is being shown in the image provided? \nOptions:\n(A) Lung nodules\n(B) Pulmonary embolism\n(C) Pleural effusion\n(D) Aortic aneurysm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 5.009408473968506 }, { "id": "identity_314877", "image": "A-OKVQA/images/35328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the clear piece the railing on the right is attached to made of?\nChoices:\n(A) ceramic\n(B) brick\n(C) glass\n(D) wood" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 4.216669082641602 }, { "id": "identity_324434", "image": "A-OKVQA/images/537698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the interior of the lettering in front of the helmet on the player?\nChoices:\n(A) pink\n(B) orange\n(C) white\n(D) red" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.3125, "loss": 4.082135200500488 }, { "id": "identity_331791", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many axles?" }, { "from": "gpt", "value": "The answer is aviva stadium" } ], "clip_score": 11.7578125, "loss": 5.728671073913574 }, { "id": "identity_308860", "image": "ScienceQA/images/17238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms is a primary consumer in this food web?\nChoices:\n(A) black bear\n(B) parasol fungus\n(C) bolete fungus\n(D) silver maple" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.125, "loss": 4.008252143859863 }, { "id": "identity_321962", "image": "ScienceQA/images/18908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the common name of this organism?\nChoices:\n(A) Flabellina iodinea\n(B) Spanish shawl nudibranch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.3125, "loss": 4.3130388259887695 }, { "id": "identity_215704", "image": "A-OKVQA/images/151594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheeled suitcases are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.390625, "loss": 4.190432548522949 }, { "id": "identity_45036", "image": "MapQA/images/map_9652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have loans in the range of 6192607-6192607?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 32.25, "loss": 4.031833648681641 }, { "id": "identity_199090", "image": "PMC-VQA/images/PMC5476429_fig8_180437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the highlighted area within the blue rectangle?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 17.890625, "loss": 4.06223201751709 }, { "id": "identity_270855", "image": "DocVQA/images/zhpc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From whom did this document originate?" }, { "from": "gpt", "value": "The answer is MARTIN J. FORMAN , MR/FFP" } ], "clip_score": 24.078125, "loss": 4.206989288330078 }, { "id": "identity_124277", "image": "A-OKVQA/images/328810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine bottles can be seen on the counter?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.109375, "loss": 4.6301679611206055 }, { "id": "identity_6520", "image": "AI2D/images/4233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bottom-most portion?\nChoices:\n(A) none of the above\n(B) kaibab\n(C) moenkopi\n(D) breccia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.296875, "loss": 4.274380683898926 }, { "id": "identity_295413", "image": "TabMWP/images/6897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which day of the week did the coffee shop sell the least number of cups of coffee? Was it:\n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 39.21875, "loss": 4.100180149078369 }, { "id": "identity_31394", "image": "A-OKVQA/images/111436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man in the foreground doing?\nChoices:\n(A) hiding bike\n(B) eating lunch\n(C) stealing bike\n(D) repairing bike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.875, "loss": 4.565752983093262 }, { "id": "identity_321240", "image": "AI2D/images/3893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located at the very top of this structure?\nChoices:\n(A) leaf scars\n(B) lateral buds\n(C) terminal bud\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5, "loss": 4.223060607910156 }, { "id": "identity_301999", "image": "A-OKVQA/images/133964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bird is on the colorful barrel?\nChoices:\n(A) eagle\n(B) swan\n(C) pelican\n(D) rooster" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.796875, "loss": 4.020426273345947 }, { "id": "identity_292426", "image": "PMC-VQA/images/PMC8138201_f1_446613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located among the following options: (A) Right temporal lobe, (B) Left frontal lobe, (C) Right frontal lobe, or (D) Left temporal lobe?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.296875, "loss": 4.043992519378662 }, { "id": "identity_312578", "image": "ScienceQA/images/15443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is shown?\nChoices:\n(A) Rhode Island\n(B) New Hampshire\n(C) Vermont\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.796875, "loss": 4.288085460662842 }, { "id": "identity_324528", "image": "ScienceQA/images/3687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Rhode Island\n(B) Georgia\n(C) New York\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.398646831512451 }, { "id": "identity_265340", "image": "A-OKVQA/images/82740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What obstacle is the horse jumping over? \n(A) zebra\n(B) hurdle\n(C) trainer\n(D) jockey" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 4.170535564422607 }, { "id": "identity_319514", "image": "TextVQA/images/5371cd2bc59dab7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is displayed on camera 1?" }, { "from": "gpt", "value": "The answer is layers below" } ], "clip_score": 21.6875, "loss": 6.295206546783447 }, { "id": "identity_209747", "image": "PMC-VQA/images/PMC7533594_F1_265727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images show a cross-sectional view of the brain?" }, { "from": "gpt", "value": "The answer is Two images" } ], "clip_score": 30.78125, "loss": 4.851987838745117 }, { "id": "identity_277752", "image": "A-OKVQA/images/168763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the occupation of the men riding on horses?\nChoices:\n(A) waiter\n(B) officer\n(C) priest\n(D) doctor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.765625, "loss": 4.624575614929199 }, { "id": "identity_297827", "image": "AI2D/images/660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, which component is represented by H?\nChoices:\n(A) mesosphere\n(B) inner core\n(C) mantle\n(D) outer core" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.8125, "loss": 4.530658721923828 }, { "id": "identity_118299", "image": "VQA2.0/images/367367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is the person kneeling down wearing?" }, { "from": "gpt", "value": "The answer is A suit" } ], "clip_score": 13.953125, "loss": 4.095707416534424 }, { "id": "identity_324275", "image": "PMC-VQA/images/PMC6414137_acm212546-fig-0003_449313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of reconstruction is depicted on the right?\nChoices:\n(A) Monoenergetic reconstruction\n(B) CatPhan reconstruction\n(C) DD 140 kV reconstruction\n(D) Reconstruction on the left" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.90625, "loss": 4.093111991882324 }, { "id": "identity_29561", "image": "A-OKVQA/images/178078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What angle is the motorcycle at to the cars?\nChoices:\n(A) obtuse\n(B) perpendicular\n(C) right\n(D) parallel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.421875, "loss": 4.146904468536377 }, { "id": "identity_331279", "image": "PMC-VQA/images/PMC3996258_F3_283260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in red in the surface charge image of PvD1r?\nChoices:\n(A) Positive surface charge\n(B) Negative surface charge\n(C) Neutral surface charge\n(D) No surface charge is indicated" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.515625, "loss": 4.511918067932129 }, { "id": "identity_311616", "image": "ScienceQA/images/1927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Fill in the blank.\nHydrogen sulfide is ().\nChoices:\n(A) a compound\n(B) an elementary substance" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.01993465423584 }, { "id": "identity_116757", "image": "VQA2.0/images/218379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.828125, "loss": 4.352137088775635 }, { "id": "identity_308538", "image": "ScienceQA/images/4189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Boston\n(B) New York City\n(C) Baltimore\n(D) Pittsburgh" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.265625, "loss": 4.489731788635254 }, { "id": "identity_38538", "image": "IconQA/images/96910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is fourth?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.921875, "loss": 4.7149658203125 }, { "id": "identity_57470", "image": "DVQA/images/bar_train_00153486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for the green bar labeled 'cheek'?" }, { "from": "gpt", "value": "The answer is charm" } ], "clip_score": 22.515625, "loss": 5.096157073974609 }, { "id": "identity_282278", "image": "A-OKVQA/images/297419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is in close proximity to the flowers among the following options: dog, rooster, cat, or cow?\nChoices:\n(A) dog\n(B) rooster\n(C) cat\n(D) cow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.09549617767334 }, { "id": "identity_31711", "image": "PMC-VQA/images/PMC5627283_Fig1_223949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in panel (B)? \nChoices:\n(A) Diffusion measure collection in native space \n(B) Predefined fiber tract property parameterization \n(C) Inverse transformation of fiber tract \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.092780113220215 }, { "id": "identity_291087", "image": "TextVQA/images/f8e57988876e154c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the handwriting?" }, { "from": "gpt", "value": "The answer is the cancer card" } ], "clip_score": 16.21875, "loss": 4.547404766082764 }, { "id": "identity_305788", "image": "PMC-VQA/images/PMC7027449_pd5591-fig-0001_120117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to detect the lesion?\nChoices:\n(A) X-ray \n(B) MRI \n(C) CT \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 4.058841705322266 }, { "id": "identity_295893", "image": "DVQA/images/bar_train_00123519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, counting from the bottom, is labeled as the third one?" }, { "from": "gpt", "value": "The answer is result" } ], "clip_score": 22.0, "loss": 5.064554691314697 }, { "id": "identity_308556", "image": "A-OKVQA/images/399147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does lb mean as it is written on these signs?\nChoices:\n(A) pounds\n(B) language barrier\n(C) lemon bags\n(D) liters" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.0, "loss": 4.2992634773254395 }, { "id": "identity_307162", "image": "DVQA/images/bar_train_00171407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars?" }, { "from": "gpt", "value": "The answer is access" } ], "clip_score": 17.0, "loss": 4.166717529296875 }, { "id": "identity_187690", "image": "A-OKVQA/images/319933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible dispensers are there above the conveyor belt?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.796875, "loss": 4.187967777252197 }, { "id": "identity_21097", "image": "ScienceQA/images/3590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) sour\n(B) hard\n(C) translucent" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.3203125, "loss": 4.50753116607666 }, { "id": "identity_282163", "image": "A-OKVQA/images/132141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object that the person in the foreground is positioned above?\nChoices:\n(A) ramp\n(B) car\n(C) rooftop\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.40625, "loss": 4.549554824829102 }, { "id": "identity_216356", "image": "A-OKVQA/images/326936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots and pans are visible on the counter?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.21875, "loss": 4.563567161560059 }, { "id": "identity_113408", "image": "TextVQA/images/92ee9779da5e2b9e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the reflection of the mirror at the back of the store?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.9375, "loss": 4.741080284118652 }, { "id": "identity_291987", "image": "IconQA/images/23564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored pink? Is it:\n(A) 1/4\n(B) 1/2\n(C) 1/3\n(D) 1/5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.165591239929199 }, { "id": "identity_117062", "image": "A-OKVQA/images/449634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of tracks are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 20.21875, "loss": 4.867024898529053 }, { "id": "identity_51938", "image": "DocVQA/images/kxcg0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person whose information is provided in the image?" }, { "from": "gpt", "value": "The answer is Mike Middleton" } ], "clip_score": 21.953125, "loss": 4.561160564422607 }, { "id": "identity_313238", "image": "IconQA/images/31004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a barn. What is the fourth image?\nChoices:\n(A) tractor\n(B) sun\n(C) barn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.015625, "loss": 4.314894199371338 }, { "id": "identity_260550", "image": "VizWiz/images/VizWiz_train_00007149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main components of this swing trainer and how do they contribute to its function?" }, { "from": "gpt", "value": "The answer is Grip, shaft, weighted end; improve mechanics, strength, and tempo" } ], "clip_score": 30.828125, "loss": 4.069828510284424 }, { "id": "identity_120454", "image": "A-OKVQA/images/125252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.75, "loss": 4.438076972961426 }, { "id": "identity_21459", "image": "PMC-VQA/images/PMC6790021_Fig2_50301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which column represents distance in the graph? \nChoices:\n(A) Left column \n(B) Right column \n(C) Top column \n(D) Bottom column " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.541015625, "loss": 4.02867317199707 }, { "id": "identity_107788", "image": "VQA2.0/images/149679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.265625, "loss": 4.138483047485352 }, { "id": "identity_35518", "image": "AI2D/images/2387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which stage can the butterfly lay eggs?\nChoices:\n(A) Adult\n(B) Larvae\n(C) Pupa\n(D) Nymph" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.46875, "loss": 4.338632106781006 }, { "id": "identity_113611", "image": "A-OKVQA/images/416356.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lemon wedges are on the plate?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.953125, "loss": 4.6141839027404785 }, { "id": "identity_211236", "image": "PMC-VQA/images/PMC3651279_F1_203874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located at the center of the image?" }, { "from": "gpt", "value": "The answer is Small bowel" } ], "clip_score": 27.640625, "loss": 4.287215232849121 }, { "id": "identity_16268", "image": "PMC-VQA/images/PMC3512476_F3_170135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the computed tomography scan show inside the kidney? \nChoices:\n(A) High-density areas \n(B) Normal tissue \n(C) Low-density areas \n(D) Fluid-filled cysts " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.75, "loss": 4.164279937744141 }, { "id": "identity_199012", "image": "A-OKVQA/images/364590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the chair the person is sitting on?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 11.71875, "loss": 4.58925724029541 }, { "id": "identity_319410", "image": "TQA/images/question_images/cellular_respiration_6626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What produces carbon dioxide and water?\nChoices:\n(A) chloroplast\n(B) mitochondrion\n(C) ATP\n(D) sun" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.890625, "loss": 4.252824306488037 }, { "id": "identity_309598", "image": "A-OKVQA/images/168162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is this animal good at?\nChoices:\n(A) running\n(B) prancing\n(C) counting\n(D) jumping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.03125, "loss": 4.129216194152832 }, { "id": "identity_103689", "image": "Super-CLEVR/images/superCLEVR_new_001214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same type as the green bicycle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 26.765625, "loss": 4.984112739562988 }, { "id": "identity_23347", "image": "A-OKVQA/images/206394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the circular silver object on the boat used for?\nChoices:\n(A) diving\n(B) steering\n(C) sitting\n(D) fishing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.25, "loss": 4.656253337860107 }, { "id": "identity_277655", "image": "TextVQA/images/12c82a20c9c89c02.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text displayed on the train to the left?" }, { "from": "gpt", "value": "The answer is sprecher" } ], "clip_score": 26.203125, "loss": 5.015120029449463 }, { "id": "identity_21653", "image": "TextVQA/images/4088d36dea48dbe2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the orange dish detergent?" }, { "from": "gpt", "value": "The answer is ajax" } ], "clip_score": 21.734375, "loss": 4.403414249420166 }, { "id": "identity_311850", "image": "A-OKVQA/images/308947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these individuals most likely trying to put out?\nChoices:\n(A) flyers\n(B) light\n(C) fire\n(D) dinner" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.2734375, "loss": 4.19158935546875 }, { "id": "identity_119049", "image": "Super-CLEVR/images/superCLEVR_new_001545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the purple object in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 23.984375, "loss": 4.054449558258057 }, { "id": "identity_204056", "image": "PMC-VQA/images/PMC8008854_f1_406334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.015625, "loss": 4.575782775878906 }, { "id": "identity_184765", "image": "VizWiz/images/VizWiz_train_00018284.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tips of the dinosaur toy's spikes?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 31.875, "loss": 4.49367618560791 }, { "id": "identity_24779", "image": "IconQA/images/3829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of triangles even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.296875, "loss": 4.107121467590332 }, { "id": "identity_222772", "image": "A-OKVQA/images/57224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many skiers are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.265625, "loss": 4.064902305603027 }, { "id": "identity_29913", "image": "PMC-VQA/images/PMC6681608_fig6_19538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the malocclusion type of the patient?\nChoices:\n(A) Skeletal Class III \n(B) Skeletal Class II \n(C) Skeletal Class I \n(D) Skeletal Open Bite" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.039521217346191 }, { "id": "identity_280015", "image": "TextVQA/images/00fca8a2da7a4694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand does the watch belong to?" }, { "from": "gpt", "value": "The answer is zoppini" } ], "clip_score": 28.21875, "loss": 4.952396869659424 }, { "id": "identity_34888", "image": "VizWiz/images/VizWiz_train_00008986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's this?" }, { "from": "gpt", "value": "The answer is mug queen england on" } ], "clip_score": 29.375, "loss": 4.050909996032715 }, { "id": "identity_269132", "image": "PMC-VQA/images/PMC7960067_fig1_391626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which specific area is the lesion situated?\nChoices:\n(A) Left frontal scalp\n(B) Right frontal scalp\n(C) Left occipital scalp\n(D) Right occipital scalp" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 4.463735580444336 }, { "id": "identity_295073", "image": "A-OKVQA/images/113977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with glasses on her head looking at?\nChoices:\n(A) performance\n(B) bird\n(C) grass\n(D) camera" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.328125, "loss": 4.584286212921143 }, { "id": "identity_158825", "image": "TQA/images/abc_question_images/blastocyst_19030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which stage does the embryo first show cell differentiation, as indicated by the diagram?" }, { "from": "gpt", "value": "The answer is Label U" } ], "clip_score": 32.9375, "loss": 4.151882171630859 }, { "id": "identity_186827", "image": "VQA2.0/images/145140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are stacked together on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.171875, "loss": 4.403567314147949 }, { "id": "identity_32946", "image": "FigureQA/images/78097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Sienna have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.765625, "loss": 4.170068264007568 }, { "id": "identity_12584", "image": "PMC-VQA/images/PMC5558640_fig1_203625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the position of the distal clavicle indicate?\nChoices:\n(A) Good quality reduction \n(B) A broken bone \n(C) Low quality reduction \n(D) Nothing in particular " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.5, "loss": 5.294372081756592 }, { "id": "identity_269051", "image": "A-OKVQA/images/191511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object on the desk, which is blue and round, a representation of?\nChoices:\n(A) planet\n(B) gyroscope\n(C) moon\n(D) globe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.171875, "loss": 4.391772270202637 }, { "id": "identity_323003", "image": "FigureQA/images/45550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Royal Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0, "loss": 4.154387474060059 }, { "id": "identity_17973", "image": "A-OKVQA/images/406848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the purplish strips on the pizza?\nChoices:\n(A) red carrots\n(B) red onions\n(C) eggplant\n(D) cabbage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8359375, "loss": 4.40445613861084 }, { "id": "identity_121343", "image": "A-OKVQA/images/38645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible on the deck?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.203125, "loss": 4.694791793823242 }, { "id": "identity_198708", "image": "PMC-VQA/images/PMC3374820_pone-0038732-g005_141511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the regions marked with white in the image?" }, { "from": "gpt", "value": "The answer is Square and arrow shapes" } ], "clip_score": 18.90625, "loss": 4.448169231414795 }, { "id": "identity_316380", "image": "FigureQA/images/79752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Yellow Green the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.093611717224121 }, { "id": "identity_322429", "image": "ScienceQA/images/18939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Providence\n(B) Albany\n(C) Harrisburg\n(D) Manchester" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.609375, "loss": 4.506379127502441 }, { "id": "identity_222155", "image": "VQA-AS/images/8297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.0625, "loss": 4.082226753234863 }, { "id": "identity_206357", "image": "A-OKVQA/images/479658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person second from the right?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.078125, "loss": 4.546700477600098 }, { "id": "identity_314912", "image": "VQA2.0/images/544921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vehicle would you want to see if something was just stolen from you?" }, { "from": "gpt", "value": "The answer is car on bottom" } ], "clip_score": 14.609375, "loss": 4.3587541580200195 }, { "id": "identity_330502", "image": "A-OKVQA/images/24714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they doing here?\nChoices:\n(A) hang out\n(B) sell items\n(C) beggers\n(D) church" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.3125, "loss": 4.661688327789307 }, { "id": "identity_15913", "image": "FigureQA/images/46398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.28125, "loss": 4.5044074058532715 }, { "id": "identity_27284", "image": "Super-CLEVR/images/superCLEVR_new_001399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the matte car right of the big car that is in front of the big thing that is behind the large gray car?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 26.75, "loss": 4.142704010009766 }, { "id": "identity_297958", "image": "A-OKVQA/images/430273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What safety feature do trains have on the glass to improve visibility during stormy weather?\nChoices:\n(A) turning signals\n(B) windshield wipers\n(C) high beams\n(D) fog heater" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.364762783050537 }, { "id": "identity_299861", "image": "A-OKVQA/images/45446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of waste does the company advertised on the yellow sign specialize in handling? \n(A) construction\n(B) appliance\n(C) recyclable\n(D) leaf" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.546875, "loss": 4.450930595397949 }, { "id": "identity_328023", "image": "A-OKVQA/images/10145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What large rectangular-shaped object is closest to the TV?\nChoices:\n(A) cabinet\n(B) console\n(C) speaker\n(D) stereo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 4.302508354187012 }, { "id": "identity_300847", "image": "PMC-VQA/images/PMC9140095_curroncol-29-00281-f001_292629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic challenge in the case shown in this image?\nChoices:\n(A) Systematized opacity\n(B) Pleural effusion\n(C) Thoracic mass\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.34375, "loss": 4.1834211349487305 }, { "id": "identity_308888", "image": "FigureQA/images/47615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.84375, "loss": 4.4374823570251465 }, { "id": "identity_267556", "image": "Super-CLEVR/images/superCLEVR_new_001425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object located to the right of the gray shiny bicycle and to the left of the big rubber fighter?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 24.6875, "loss": 4.325833320617676 }, { "id": "identity_274122", "image": "FigureQA/images/20338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.359375, "loss": 4.180591583251953 }, { "id": "identity_34630", "image": "AI2D/images/1028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Please choose the longest leaf on the left diagram.\nChoices:\n(A) K\n(B) P\n(C) J\n(D) L" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.1088433265686035 }, { "id": "identity_77075", "image": "AI2D/images/264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the oak tree play in this food chain?" }, { "from": "gpt", "value": "The answer is Primary producer" } ], "clip_score": 29.25, "loss": 5.015336513519287 }, { "id": "identity_265941", "image": "ScienceQA/images/294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nOptions:\n(A) shiny\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.609375, "loss": 4.210145950317383 }, { "id": "identity_276436", "image": "FigureQA/images/6804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.15625, "loss": 4.103440284729004 }, { "id": "identity_264538", "image": "VQA2.0/images/354305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the house?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 11.484375, "loss": 4.189063549041748 }, { "id": "identity_15949", "image": "A-OKVQA/images/442223.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of seating would one expect to find in this building?\nChoices:\n(A) futons\n(B) booths\n(C) pews\n(D) stools" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.359375, "loss": 4.471151828765869 }, { "id": "identity_135627", "image": "DocVQA/images/ltfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company the letter is addressed to?" }, { "from": "gpt", "value": "The answer is Himont Industrial, Inc" } ], "clip_score": 25.953125, "loss": 4.543941020965576 }, { "id": "identity_308916", "image": "IconQA/images/46645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. What is the fifth image?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 4.216544151306152 }, { "id": "identity_60479", "image": "DocVQA/images/qkxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the building where Joseph C. Edwards, M.D. is located?" }, { "from": "gpt", "value": "The answer is Brackenridge Building" } ], "clip_score": 19.671875, "loss": 4.1429572105407715 }, { "id": "identity_204593", "image": "TextVQA/images/290bf790e5420c37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the blog post featured in the image?" }, { "from": "gpt", "value": "The answer is Education or curriculum" } ], "clip_score": 27.390625, "loss": 4.068864822387695 }, { "id": "identity_318130", "image": "TextVQA/images/c06900b3b6c5ddb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where can the \"c\" key be found?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 18.46875, "loss": 4.367852687835693 }, { "id": "identity_135", "image": "Super-CLEVR/images/superCLEVR_new_000486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the rubber thing that is the same color as the big metallic car?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 24.6875, "loss": 4.1910505294799805 }, { "id": "identity_125227", "image": "PMC-VQA/images/PMC7220475_F1_168892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cervical vertebrae are visible in this image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 33.0625, "loss": 4.218356609344482 }, { "id": "identity_262524", "image": "PMC-VQA/images/PMC6104906_FIG1_357591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of signal is indicated by the red outline in the image provided?\nChoices:\n(A) Aortic stenosis signal \n(B) Pulmonary embolism signal \n(C) Mitral regurgitation signal \n(D) Mitral stenosis signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.71875, "loss": 4.585793495178223 }, { "id": "identity_320981", "image": "FigureQA/images/44179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7265625, "loss": 4.084800720214844 }, { "id": "identity_177248", "image": "IconQA/images/44542.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more flowers in the top row or the bottom row?" }, { "from": "gpt", "value": "The answer is The same" } ], "clip_score": 28.09375, "loss": 4.357418060302734 }, { "id": "identity_191188", "image": "PMC-VQA/images/PMC7718010_F1_315486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color used to stain the background of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.015625, "loss": 4.683568000793457 }, { "id": "identity_20711", "image": "A-OKVQA/images/552054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What zone is this area likely to be?\nChoices:\n(A) business\n(B) tourist\n(C) residential\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.671875, "loss": 4.4885406494140625 }, { "id": "identity_22993", "image": "ScienceQA/images/16628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) blue\n(B) hard\n(C) soft" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.859375, "loss": 4.02486515045166 }, { "id": "identity_270810", "image": "VizWiz/images/VizWiz_train_00007293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.890625, "loss": 4.263854026794434 }, { "id": "identity_207656", "image": "A-OKVQA/images/581542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dancers wearing green dresses are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.046875, "loss": 4.652850151062012 }, { "id": "identity_207020", "image": "A-OKVQA/images/244088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 14.75, "loss": 5.007089138031006 }, { "id": "identity_300293", "image": "A-OKVQA/images/16403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in the background waiting for?\nChoices:\n(A) cruise boat\n(B) bus\n(C) cab\n(D) van" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6328125, "loss": 4.305008888244629 }, { "id": "identity_262236", "image": "ScienceQA/images/14827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\n(A) Frankfort\n(B) Columbia\n(C) Oklahoma City\n(D) Detroit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 4.106655120849609 }, { "id": "identity_320201", "image": "PMC-VQA/images/PMC6078123_F2_350570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of procedure was performed on this shoulder?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.96875, "loss": 4.224632740020752 }, { "id": "identity_330061", "image": "FigureQA/images/26362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.859375, "loss": 4.352937698364258 }, { "id": "identity_308561", "image": "DocVQA/images/khgl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the letter?" }, { "from": "gpt", "value": "The answer is CJB" } ], "clip_score": 23.015625, "loss": 5.166812419891357 }, { "id": "identity_287901", "image": "A-OKVQA/images/148427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is displayed on the white carriage? \n(A) Italy\n(B) Ireland\n(C) Germany\n(D) Russia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8828125, "loss": 4.1749491691589355 }, { "id": "identity_311308", "image": "VQA2.0/images/95422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the structures?" }, { "from": "gpt", "value": "The answer is silver" } ], "clip_score": 15.4921875, "loss": 5.184750556945801 }, { "id": "identity_325174", "image": "A-OKVQA/images/27041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person working here?\nChoices:\n(A) left home\n(B) sunny outside\n(C) power source\n(D) is hiding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.5, "loss": 4.394402503967285 }, { "id": "identity_25377", "image": "PMC-VQA/images/PMC5964522_F2_317805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ was analyzed in the experiment? \nChoices:\n(A) brain \n(B) liver \n(C) spleen \n(D) lung. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.339376449584961 }, { "id": "identity_184701", "image": "TextVQA/images/92b673a422724af5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many printed pages can be seen on the desk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.015625, "loss": 4.202544212341309 }, { "id": "identity_17664", "image": "AI2D/images/1905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is at the highest of the food chain in this diagram?\nChoices:\n(A) hawk\n(B) snake\n(C) grasshopper\n(D) lizard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.25, "loss": 4.1216511726379395 }, { "id": "identity_23112", "image": "A-OKVQA/images/437516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skateboarder not wearing that most serious skateboarders always wear?\nChoices:\n(A) safety gear\n(B) chucks\n(C) shorts\n(D) sunglasses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.421875, "loss": 4.4660186767578125 }, { "id": "identity_216296", "image": "VQA-AS/images/13913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.6875, "loss": 4.564546585083008 }, { "id": "identity_320806", "image": "A-OKVQA/images/457119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the pigeons on the back of the bench doing?\nChoices:\n(A) cooing\n(B) eating\n(C) fighting\n(D) preening" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.90625, "loss": 4.045719146728516 }, { "id": "identity_323916", "image": "TextVQA/images/8abf34fc9c4016a6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is displayed here?" }, { "from": "gpt", "value": "The answer is maid" } ], "clip_score": 11.9453125, "loss": 5.34229040145874 }, { "id": "identity_23089", "image": "VQA2.0/images/30449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the walls made of?" }, { "from": "gpt", "value": "The answer is rocks" } ], "clip_score": 21.4375, "loss": 4.65076208114624 }, { "id": "identity_334178", "image": "DocVQA/images/jsgn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the sender of this document?" }, { "from": "gpt", "value": "The answer is Jim Lancaster" } ], "clip_score": 27.0625, "loss": 4.016972541809082 }, { "id": "identity_29526", "image": "A-OKVQA/images/233119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would you most likely do with a card near here?\nChoices:\n(A) magic trick\n(B) open door\n(C) play cards\n(D) banking" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.5546875, "loss": 4.506185054779053 }, { "id": "identity_299173", "image": "A-OKVQA/images/471280.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is typically placed on train tracks by people?\nChoices:\n(A) ballast\n(B) asphalt\n(C) concrete\n(D) wires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 4.43203067779541 }, { "id": "identity_74142", "image": "ScienceQA/images/10741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are depicted in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.453125, "loss": 4.893212795257568 }, { "id": "identity_69547", "image": "ScienceQA/images/15504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves can be seen on the tree bark image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.90625, "loss": 4.210705757141113 }, { "id": "identity_138750", "image": "DocVQA/images/qkjk0079_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the individual for whom this performance plan is created?" }, { "from": "gpt", "value": "The answer is G. T. Baroody" } ], "clip_score": 23.96875, "loss": 4.209988117218018 }, { "id": "identity_242070", "image": "IconQA/images/97942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify and describe three distinct asymmetrical features in the image" }, { "from": "gpt", "value": "The answer is Card, hat shape, hatband" } ], "clip_score": 34.59375, "loss": 5.051088333129883 }, { "id": "identity_252919", "image": "PMC-VQA/images/PMC5787671_F5_268493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the histological features observed in the cross sections shown in the image, describe the pathological changes and explain how they are indicative of the specific disease/malady linked to the image" }, { "from": "gpt", "value": "The answer is Tonsillitis" } ], "clip_score": 21.375, "loss": 4.0887298583984375 }, { "id": "identity_275894", "image": "PMC-VQA/images/PMC8606177_FIG2_116677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in Image B?\nChoices:\n(A) Dilated ventricles\n(B) Normal brain parenchyma\n(C) Engorged choroid plexus\n(D) Normal foramina of Luschka" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0625, "loss": 4.389181613922119 }, { "id": "identity_237144", "image": "A-OKVQA/images/4554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the skill level of most snowboarders here, what are the key indicators that suggest their level of experience, and how do these indicators compare to the characteristics of professional and Olympic snowboarders?" }, { "from": "gpt", "value": "The answer is Beginners" } ], "clip_score": 27.90625, "loss": 6.045853137969971 }, { "id": "identity_27122", "image": "TQA/images/question_images/parts_skeleton_7277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the skull to the pelvis?\nChoices:\n(A) humerus\n(B) ribs\n(C) vertebral column\n(D) sternum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.875, "loss": 4.35406494140625 }, { "id": "identity_291944", "image": "FigureQA/images/83499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orchid's area under the curve the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.71875, "loss": 4.08619499206543 }, { "id": "identity_264389", "image": "PMC-VQA/images/PMC8195437_ppat.1009632.g003_464513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the white arrows in the insets pointing to? \nChoices:\n(A) Bacterial particles\n(B) Viral particles\n(C) Parasitic particles\n(D) Fungal particles." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.203125, "loss": 5.01032018661499 }, { "id": "identity_312221", "image": "ScienceQA/images/16331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Springfield\n(B) Phoenix\n(C) Tucson\n(D) Portland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.546875, "loss": 4.389528274536133 }, { "id": "identity_313947", "image": "VizWiz/images/VizWiz_train_00023757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code in the image?" }, { "from": "gpt", "value": "The answer is minglyt" } ], "clip_score": 20.484375, "loss": 4.090199947357178 }, { "id": "identity_291355", "image": "VizWiz/images/VizWiz_train_00013527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is table red top" } ], "clip_score": 20.9375, "loss": 4.940299034118652 }, { "id": "identity_125983", "image": "TextVQA/images/578a0e1a3660985e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the black and white sign below the stop sign indicate?" }, { "from": "gpt", "value": "The answer is Traffic island ahead" } ], "clip_score": 31.984375, "loss": 4.5346550941467285 }, { "id": "identity_36481", "image": "A-OKVQA/images/313948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of building is seen here?\nChoices:\n(A) expo hall\n(B) barn\n(C) school\n(D) kitchen furnishing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.6171875, "loss": 4.219862461090088 }, { "id": "identity_185047", "image": "VQA2.0/images/137547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the second remote from the left?" }, { "from": "gpt", "value": "The answer is LITEON" } ], "clip_score": 31.234375, "loss": 4.299874305725098 }, { "id": "identity_294494", "image": "A-OKVQA/images/33813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to a group of these plentiful items?\nChoices:\n(A) clowder\n(B) bushel\n(C) fleet\n(D) squad" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.875, "loss": 4.476324081420898 }, { "id": "identity_302501", "image": "A-OKVQA/images/67470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white object on this bike frame used for?\nChoices:\n(A) collecting bugs\n(B) engine fuel\n(C) collecting trash\n(D) drinking" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.6875, "loss": 4.203890800476074 }, { "id": "identity_18698", "image": "PMC-VQA/images/PMC9294512_F2_344289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is present in the encircled area of the image captured at 48 h? \nChoices:\n(A) presence of fimbriae \n(B) presence of pili \n(C) presence of nanotubes \n(D) absence of nanotubes " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.53125, "loss": 4.742111682891846 }, { "id": "identity_186709", "image": "A-OKVQA/images/185302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the rope tied to the front of the boat named Maggie?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.734375, "loss": 4.000149726867676 }, { "id": "identity_36601", "image": "A-OKVQA/images/92804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the leftmost skier doing?\nChoices:\n(A) seeking shelter\n(B) hiding\n(C) waiting turn\n(D) sleeping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 4.7615251541137695 }, { "id": "identity_299422", "image": "PMC-VQA/images/PMC6988233_Fig1_107589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristics are evident in the images of pulmonary fibrosis?\nChoices:\n(A) Reduced reticulation and honeycombing,\n(B) Predominance in the lower lobes and air trapping,\n(C) Traction bronchiectasis and honeycombing,\n(D) Emphysema and blebs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.703125, "loss": 4.42832088470459 }, { "id": "identity_30507", "image": "A-OKVQA/images/72011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What season will occur after the current season?\nChoices:\n(A) summer\n(B) winter\n(C) spring\n(D) autumn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.453125, "loss": 4.244417667388916 }, { "id": "identity_288240", "image": "AI2D/images/389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What factor from the food web diagram leads to an increase in kingfisher population?\nChoices:\n(A) decrease in fish\n(B) decrease in water boatman\n(C) increase in fish\n(D) increase in algae" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.53125, "loss": 4.026590824127197 }, { "id": "identity_337615", "image": "TextVQA/images/61e78e1529ef6d0e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is in red?" }, { "from": "gpt", "value": "The answer is gooin" } ], "clip_score": 13.65625, "loss": 5.215181350708008 }, { "id": "identity_330993", "image": "DocVQA/images/lxjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the project mentioned in the form?" }, { "from": "gpt", "value": "The answer is Referral Smokers - Charlotte Tests" } ], "clip_score": 20.125, "loss": 5.569021224975586 }, { "id": "identity_322822", "image": "DocVQA/images/fpjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand mentioned here?" }, { "from": "gpt", "value": "The answer is WINSTON" } ], "clip_score": 15.453125, "loss": 4.354903697967529 }, { "id": "identity_3440", "image": "TQA/images/question_images/parts_skeleton_7251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bones connect to the phalanges in the hand?\nChoices:\n(A) Metacarpals\n(B) Carpals\n(C) Metatarsals\n(D) Tarsals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5625, "loss": 4.131994724273682 }, { "id": "identity_14639", "image": "TQA/images/abc_question_images/parts_telescope_18155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label shows the optical tube?\nChoices:\n(A) V\n(B) W\n(C) P\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.484375, "loss": 4.962649822235107 }, { "id": "identity_118770", "image": "TextVQA/images/9a8f76f1817e2011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the blue object placed above or below the ruler?" }, { "from": "gpt", "value": "The answer is Below" } ], "clip_score": 26.78125, "loss": 4.016034126281738 }, { "id": "identity_108511", "image": "TextVQA/images/8aa8c4cf9db9547b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue bins can be seen in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.890625, "loss": 4.734477996826172 }, { "id": "identity_320555", "image": "TextVQA/images/31020aab9724657b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What job title is listed in the center of the board?" }, { "from": "gpt", "value": "The answer is libarian" } ], "clip_score": 19.84375, "loss": 4.310022830963135 }, { "id": "identity_282395", "image": "PMC-VQA/images/PMC3522352_fig192_172905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what factors does the area of the canal rely?\nChoices:\n(A) AP diameter\n(B) longitudinal length\n(C) opening width\n(D) distance between the openings" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0625, "loss": 4.070246696472168 }, { "id": "identity_327995", "image": "A-OKVQA/images/219558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The parent company of this phone brand is from which country?\nChoices:\n(A) finland\n(B) germany\n(C) japan\n(D) united states" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.65625, "loss": 4.75052547454834 }, { "id": "identity_239891", "image": "PMC-VQA/images/PMC2650704_F1_35335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the asterisks indicating discontinuous vessels correlate with the overall vascular pattern in the image, and what might this suggest about the developmental stage of the specimen?" }, { "from": "gpt", "value": "The answer is Early stage of vascular development with mixed vessel continuity" } ], "clip_score": 24.28125, "loss": 4.447359085083008 }, { "id": "identity_265035", "image": "TQA/images/abc_question_images/seasons_10084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the symbol that represents autumn from the following choices: (A) X, (B) P, (C) Y, (D) J." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.265625, "loss": 4.395325660705566 }, { "id": "identity_33606", "image": "PMC-VQA/images/PMC6684521_F1_20545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) Microtubules in RNFF1 cell line, \n(B) Dysfunctional telomeres in MR39 cell line, \n(C) Signal intensities of individual chromosomes, \n(D) Spread of cells on 9th passage. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.4375, "loss": 4.622853755950928 }, { "id": "identity_133993", "image": "TextVQA/images/ad644fefedb7ae02.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap on the green Monster Rehab can?" }, { "from": "gpt", "value": "The answer is Green cap" } ], "clip_score": 23.90625, "loss": 4.229803085327148 }, { "id": "identity_29668", "image": "A-OKVQA/images/508722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the important part of this sport?\nChoices:\n(A) looks\n(B) fun\n(C) security\n(D) fame" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.375, "loss": 4.9054107666015625 }, { "id": "identity_198254", "image": "A-OKVQA/images/515710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the lamp hanging above the dining table?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.828125, "loss": 4.634083271026611 }, { "id": "identity_29336", "image": "AI2D/images/595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the diagram represents the larva?\nChoices:\n(A) D\n(B) F\n(C) C\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.71875, "loss": 4.072265148162842 }, { "id": "identity_289200", "image": "PMC-VQA/images/PMC3356314_pone-0037520-g003_138700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the boxes in the micrographs?\nChoices:\n(A) Areas with high muscle density \n(B) Areas with low muscle density \n(C) Areas shown in other micrographs \n(D) Areas with no muscle scars" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.09375, "loss": 4.2006731033325195 }, { "id": "identity_257689", "image": "VQA2.0/images/349425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of citrus fruit is shown in the image, and what are the distinguishing characteristics that differentiate it from a regular orange?" }, { "from": "gpt", "value": "The answer is Blood orange" } ], "clip_score": 31.484375, "loss": 4.610651016235352 }, { "id": "identity_272739", "image": "ScienceQA/images/14879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.0989227294921875 }, { "id": "identity_220890", "image": "A-OKVQA/images/257388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.890625, "loss": 4.716238021850586 }, { "id": "identity_243809", "image": "TextVQA/images/caedb5851c642d53.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the text, what are the challenges mentioned in designing for responsive design, and how do they impact the overall design process?" }, { "from": "gpt", "value": "The answer is Pixels, images, advertising, physical device, touch targets, large touch targets" } ], "clip_score": 20.140625, "loss": 4.035268783569336 }, { "id": "identity_268442", "image": "ScienceQA/images/16033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific humidity level measured in the highlighted area shown?\nChoices:\n(A) 16 grams of water vapor per kilogram of air\n(B) 23 grams of water vapor per kilogram of air\n(C) 14 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.03125, "loss": 4.143038272857666 }, { "id": "identity_57060", "image": "DocVQA/images/rrvw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the medication that has demonstrated a good clinical safety profile?" }, { "from": "gpt", "value": "The answer is Totelle" } ], "clip_score": 21.9375, "loss": 5.338806629180908 }, { "id": "identity_112457", "image": "Super-CLEVR/images/superCLEVR_new_001758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the yellow one?" }, { "from": "gpt", "value": "The answer is School bus" } ], "clip_score": 24.25, "loss": 4.029294967651367 }, { "id": "identity_16851", "image": "FigureQA/images/42017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.953125, "loss": 4.579854965209961 }, { "id": "identity_325006", "image": "TextVQA/images/0c9622309366191d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the advertiser on this blue sign?" }, { "from": "gpt", "value": "The answer is dentaquest" } ], "clip_score": 13.671875, "loss": 4.918984889984131 }, { "id": "identity_254815", "image": "A-OKVQA/images/440032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features in the image that indicate the type of passenger service available on this street, and how do they confirm the answer?" }, { "from": "gpt", "value": "The answer is Tracks, overhead wires, and tram design confirm tram service" } ], "clip_score": 24.109375, "loss": 4.383246421813965 }, { "id": "identity_157717", "image": "ScienceQA/images/1791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dunes are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.171875, "loss": 4.266891002655029 }, { "id": "identity_58239", "image": "DocVQA/images/lffn0226_48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the three cloth infuser bags shown in Figure 14?" }, { "from": "gpt", "value": "The answer is To inflate with clean air" } ], "clip_score": 19.875, "loss": 4.410264015197754 }, { "id": "identity_266438", "image": "IconQA/images/72419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) impossible\n(C) unlikely\n(D) certain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.265625, "loss": 4.025388717651367 }, { "id": "identity_14221", "image": "FigureQA/images/58958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 4.00105619430542 }, { "id": "identity_110161", "image": "TextVQA/images/65e0dd541ab2a106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the artist on the \"MUSIC FROM ANOTHER WORLD\" album?" }, { "from": "gpt", "value": "The answer is Sid Bass" } ], "clip_score": 25.0, "loss": 4.091228008270264 }, { "id": "identity_90756", "image": "UniGeo/images/1258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If BM is tangent to circle O at point B and angle MBA is 140 degrees, what is the measure of angle BCA?" }, { "from": "gpt", "value": "The answer is Error in given information" } ], "clip_score": 35.09375, "loss": 4.609042167663574 }, { "id": "identity_129378", "image": "TextVQA/images/e5a3b9f29d8abcb9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many photographers are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.5, "loss": 4.655890941619873 }, { "id": "identity_137728", "image": "DocVQA/images/kkxw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the estimated number of calls for the order option W10?" }, { "from": "gpt", "value": "The answer is (4 wks prior to BTS)" } ], "clip_score": 17.125, "loss": 4.181654930114746 }, { "id": "identity_24546", "image": "AI2D/images/4355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram which type of nut is label D?\nChoices:\n(A) Macadamin\n(B) Peanut\n(C) Almond\n(D) Cashew" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.609375, "loss": 4.021583557128906 }, { "id": "identity_266065", "image": "PMC-VQA/images/PMC7416230_fig3_232014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which radiographic evidence indicates degeneration in the uppermost cervical spine?\nChoices:\n(A) Upper arrow\n(B) Lower arrow\n(C) Both A and B\n(D) None of the above" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.265625, "loss": 4.064255714416504 }, { "id": "identity_332486", "image": "VQA2.0/images/402987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they both wearing jeans?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.3125, "loss": 4.063902378082275 }, { "id": "identity_215", "image": "IconQA/images/23861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.328125, "loss": 4.067511081695557 }, { "id": "identity_328927", "image": "A-OKVQA/images/238928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with the whip riding on?\nChoices:\n(A) donkey\n(B) horse\n(C) goat\n(D) camel" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.703125, "loss": 4.151071548461914 }, { "id": "identity_10722", "image": "ScienceQA/images/996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Kentucky\n(C) Florida\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.6875, "loss": 4.299400329589844 }, { "id": "identity_311837", "image": "FigureQA/images/2004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 4.830996036529541 }, { "id": "identity_290984", "image": "PMC-VQA/images/PMC3672083_F1_209017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the arrow in the MRI image? \nChoices:\n(A) Indicates an old infarct in the left middle cerebral artery territory \n(B) Indicates a new infarct in the left middle cerebral artery territory \n(C) Indicates an old infarct in the right pontine basis \n(D) Indicates a new infarct in the right pontine basis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.515625, "loss": 4.203269958496094 }, { "id": "identity_317702", "image": "AI2D/images/1044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the warmest season of the year?\nChoices:\n(A) summer\n(B) autumn\n(C) spring\n(D) winter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.390625, "loss": 4.029641151428223 }, { "id": "identity_14507", "image": "ScienceQA/images/11204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which trait did Palaeocyparis have? Select the trait you can observe on the fossil.\nChoices:\n(A) flowers\n(B) short, thin leaves\n(C) cones" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.96875, "loss": 4.723011493682861 }, { "id": "identity_279252", "image": "AI2D/images/3324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest structure in life that can function independently?\nChoices:\n(A) cell\n(B) organism\n(C) flower\n(D) mouse" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.3125, "loss": 4.257468223571777 }, { "id": "identity_321516", "image": "IconQA/images/41131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a train. Which image is third?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.15625, "loss": 4.184477806091309 }, { "id": "identity_329535", "image": "ScienceQA/images/13734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these four items share?\nChoices:\n(A) flexible\n(B) hard\n(C) slippery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.265625, "loss": 4.0788164138793945 }, { "id": "identity_294299", "image": "A-OKVQA/images/118051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for extending the height of the partition?\nChoices:\n(A) shielding noise\n(B) posting photos\n(C) privacy\n(D) special design" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.421875, "loss": 4.463831424713135 }, { "id": "identity_272911", "image": "A-OKVQA/images/200328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the birds likely doing in this location? Are they:\n(A) attacking\n(B) flying over\n(C) migrating\n(D) landing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.8125, "loss": 4.51468563079834 }, { "id": "identity_289735", "image": "A-OKVQA/images/253262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the truck with yellow, red, and white colors in this location?\nChoices:\n(A) issue citations\n(B) provide snacks\n(C) fuel plane\n(D) sell icecream" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.8984375, "loss": 4.695457935333252 }, { "id": "identity_296754", "image": "PMC-VQA/images/PMC8638553_Fig6_129316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow reticule next to the red arrow represent in the image?\nChoices:\n(A) Location of scan on the eye \n(B) Fixed incision depth for AK \n(C) Fixed residual uncut bed thickness \n(D) Percent thickness depth for the micro-RK." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.375, "loss": 4.285149097442627 }, { "id": "identity_131567", "image": "A-OKVQA/images/459003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible traffic signal, and if so, what color is it displaying?" }, { "from": "gpt", "value": "The answer is Yes, red" } ], "clip_score": 18.09375, "loss": 4.2188591957092285 }, { "id": "identity_314646", "image": "Super-CLEVR/images/superCLEVR_new_000538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else made of the same material as the small vehicle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.46875, "loss": 4.893829822540283 }, { "id": "identity_313342", "image": "VizWiz/images/VizWiz_train_00006424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this thing?" }, { "from": "gpt", "value": "The answer is sweetarts candy tabs" } ], "clip_score": 28.921875, "loss": 4.410978317260742 }, { "id": "identity_277778", "image": "Super-CLEVR/images/superCLEVR_new_000297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small blue wagon have the same material as the small green car?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.5, "loss": 5.134701728820801 }, { "id": "identity_266702", "image": "A-OKVQA/images/297876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for having five displays on the desk?\nChoices:\n(A) multi-tasking\n(B) for sale\n(C) redundancy\n(D) stolen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.109375, "loss": 4.267117977142334 }, { "id": "identity_144069", "image": "DocVQA/images/hgxv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the memorandum addressed to?" }, { "from": "gpt", "value": "The answer is See Attached Distribution List" } ], "clip_score": 22.296875, "loss": 4.272209167480469 }, { "id": "identity_36645", "image": "FigureQA/images/89098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.4375, "loss": 4.214227676391602 }, { "id": "identity_256535", "image": "A-OKVQA/images/555684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons the owner placed the dog on the surfboard, and how does the dog's posture indicate its comfort level with surfing?" }, { "from": "gpt", "value": "The answer is The owner placed the dog for training or enjoyment" } ], "clip_score": 24.015625, "loss": 4.209474086761475 }, { "id": "identity_270446", "image": "PlotQA/images/45814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is represented by the second bar from the top in the Research and Development chart?" }, { "from": "gpt", "value": "The answer is Greece" } ], "clip_score": 25.65625, "loss": 5.599782466888428 }, { "id": "identity_202954", "image": "VQA2.0/images/3493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many signs have a phone number listed on them?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.734375, "loss": 4.593354225158691 }, { "id": "identity_31228", "image": "A-OKVQA/images/18375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of Bank of America to the game?\nChoices:\n(A) loan provider\n(B) site provider\n(C) sponsor\n(D) fund provider" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.703125, "loss": 4.3887224197387695 }, { "id": "identity_298917", "image": "PMC-VQA/images/PMC8419393_rcr2840-fig-0002_52224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the ground-glass opacity more pronounced? \nChoices:\n(A) left lung parenchyma\n(B) right lung parenchyma\n(C) upper lung fields\n(D) lower lung fields" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.359375, "loss": 4.564609050750732 }, { "id": "identity_7581", "image": "PMC-VQA/images/PMC7000761_Fig1_111094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the guiding curve used for? \nChoices:\n(A) To resample images \n(B) To visualize the brain \n(C) To straighten the spinal cord \n(D) To cover the lumbar spinal cord " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.375, "loss": 4.448400974273682 }, { "id": "identity_20924", "image": "ScienceQA/images/3312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Maine\n(B) Rhode Island\n(C) Connecticut\n(D) Florida" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.8125, "loss": 4.744144916534424 }, { "id": "identity_317383", "image": "PMC-VQA/images/PMC6994645_Fig1_109414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the color Doppler create on this? \nChoices:\n(A) A twinkling effect \n(B) A rainbow effect \n(C) A black and white effect \n(D) A static effect" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.75, "loss": 4.239457130432129 }, { "id": "identity_318412", "image": "PMC-VQA/images/PMC3447726_F4_156431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of this image?\nChoices:\n(A) bovine oviducts at Day 1.5\n(B) PGRMC1 localisation in the infundibulum\n(C) PGRMC1 localisation in the ampulla\n(D) PGRMC1 localisation in the isthmus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5, "loss": 4.541472434997559 }, { "id": "identity_305882", "image": "A-OKVQA/images/445928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this event with the crowd happening?\nChoices:\n(A) outdoors\n(B) country club\n(C) restaurant\n(D) auditorium" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.53125, "loss": 4.31289529800415 }, { "id": "identity_291236", "image": "PMC-VQA/images/PMC8801695_F6_180438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue color representing in the images provided?\nChoices:\n(A) muscle tissue\n(B) blood vessels\n(C) emphysematous change\n(D) bone tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.96875, "loss": 4.487973213195801 }, { "id": "identity_309979", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g001_308584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the closed arrow in this image indicate?\nChoices:\n(A) Axons,\n(B) Terminal field in hindbrain,\n(C) gX,\n(D) Control vagal ganglia." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.3125, "loss": 4.03720760345459 }, { "id": "identity_306330", "image": "IconQA/images/84607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Provide a time word, not a numeral. It is (_) after two." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 27.953125, "loss": 4.210391521453857 }, { "id": "identity_155963", "image": "AI2D/images/3867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of cells can be identified in the mesophyll tissue?" }, { "from": "gpt", "value": "The answer is Two types" } ], "clip_score": 27.390625, "loss": 4.3375115394592285 }, { "id": "identity_130254", "image": "VQA-AS/images/9536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.5, "loss": 4.181849956512451 }, { "id": "identity_234810", "image": "Super-CLEVR/images/superCLEVR_new_001777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large blue things to the right of the red object than the total number of bicycles and cars in the image?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.078125, "loss": 4.977719306945801 }, { "id": "identity_322513", "image": "TextVQA/images/dda14cf415f6110a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cloud is mentioned here?" }, { "from": "gpt", "value": "The answer is flying cloud " } ], "clip_score": 17.390625, "loss": 4.409729957580566 }, { "id": "identity_329234", "image": "A-OKVQA/images/513670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is connecting to all the things up there?\nChoices:\n(A) string\n(B) balloons\n(C) claws\n(D) batteries" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.21875, "loss": 4.043772220611572 }, { "id": "identity_150682", "image": "ScienceQA/images/18774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What could be the reason for the fish's behavior in the image?" }, { "from": "gpt", "value": "The answer is Spawning migration" } ], "clip_score": 30.28125, "loss": 4.259012222290039 }, { "id": "identity_125441", "image": "A-OKVQA/images/242034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the business advertised on the side of the bus on the right?" }, { "from": "gpt", "value": "The answer is Woricker" } ], "clip_score": 22.6875, "loss": 5.331185817718506 }, { "id": "identity_3082", "image": "VQA2.0/images/74316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the dog in this picture?" }, { "from": "gpt", "value": "The answer is rug" } ], "clip_score": 19.109375, "loss": 4.95592737197876 }, { "id": "identity_323077", "image": "FigureQA/images/5207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.40625, "loss": 4.456356525421143 }, { "id": "identity_29331", "image": "TQA/images/question_images/human_system_nervous_6170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain is responsible for movement?\nChoices:\n(A) occipital lobe\n(B) frontal lobe\n(C) parietal lobe\n(D) temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 4.492679595947266 }, { "id": "identity_65527", "image": "AI2D/images/247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers are shown in this food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 32.34375, "loss": 4.755816459655762 }, { "id": "identity_9943", "image": "PMC-VQA/images/PMC6881471_Fig2_75067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is depicted in the images presented? \nChoices:\n(A) normal breathing \n(B) apneic/hypopneic event \n(C) gasping \n(D) tachypnea " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.0625, "loss": 5.61150598526001 }, { "id": "identity_265564", "image": "PMC-VQA/images/PMC4312094_pone.0116763.g004_354115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the trabecular bone located in the image provided?\nChoices:\n(A) Diaphysis \n(B) Epiphysis \n(C) Metaphysis \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.0241217613220215 }, { "id": "identity_29959", "image": "TextVQA/images/a1a8116013e1bb8b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what player wears number 27?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 19.796875, "loss": 4.270046234130859 }, { "id": "identity_330884", "image": "PMC-VQA/images/PMC6418688_polymers-09-00476-f002_451307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are these images organized?\nChoices:\n(A) Randomly\n(B) By increasing fluorescence\n(C) By increasing membrane thickness\n(D) By increasing lateral chain volume" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.546875, "loss": 4.414494514465332 }, { "id": "identity_268198", "image": "PMC-VQA/images/PMC4696192_Fig1_457881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the visible colonic loops in the image:\n(A) Ascending and transverse \n(B) Descending and sigmoid \n(C) Dorsal and ventral of the left ascending \n(D) Sigmoid and descending colon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.828125, "loss": 4.115917205810547 }, { "id": "identity_193308", "image": "A-OKVQA/images/300753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What team is the batter representing?" }, { "from": "gpt", "value": "The answer is The home team" } ], "clip_score": 21.015625, "loss": 4.150790691375732 }, { "id": "identity_296450", "image": "A-OKVQA/images/526371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the man wearing a covering over his face?\nChoices:\n(A) keeping cool\n(B) keeping warm\n(C) hiding acne\n(D) hiding scar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.125, "loss": 4.795428276062012 }, { "id": "identity_24240", "image": "Super-CLEVR/images/superCLEVR_new_000257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The big metallic bus that is in front of the big bus behind the metal object in front of the big gray thing is what color?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 29.015625, "loss": 4.144456386566162 }, { "id": "identity_299017", "image": "A-OKVQA/images/216930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for him being in the air above the skateboard? Is it because he is bouncing, showing off, confused, or falling?\nChoices:\n(A) bouncing\n(B) showing off\n(C) confused\n(D) falling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.40625, "loss": 4.560742378234863 }, { "id": "identity_293792", "image": "PMC-VQA/images/PMC9253121_Fig4_330654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the inset image depict?\nChoices:\n(A) A different sample\n(B) P3HT and PCBM\n(C) Pure P3HT\n(D) Pure PCBM" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.015625, "loss": 4.848621845245361 }, { "id": "identity_319937", "image": "A-OKVQA/images/207513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the sign are they painting?\nChoices:\n(A) top\n(B) middle\n(C) base\n(D) stop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.246432304382324 }, { "id": "identity_294291", "image": "PMC-VQA/images/PMC8122927_jcm-10-01834-f002_441031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the high magnification image?\nChoices:\n(A) Dark intracellular material \n(B) Fibrils and tubules \n(C) Bright intracellular material \n(D) Only fibrils and no tubules" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.390625, "loss": 4.648873329162598 }, { "id": "identity_333143", "image": "PMC-VQA/images/PMC5922503_FIG3_306980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pleural effusion located in the chest?\nChoices:\n(A) Right side.\n(B) Front side.\n(C) Left side.\n(D) Back side." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.515625, "loss": 4.334421634674072 }, { "id": "identity_315433", "image": "DVQA/images/bar_train_00057222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for some dataset?" }, { "from": "gpt", "value": "The answer is strip" } ], "clip_score": 32.875, "loss": 4.318573951721191 }, { "id": "identity_211550", "image": "VQA2.0/images/114473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding in the cart?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.640625, "loss": 4.093392372131348 }, { "id": "identity_290475", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f006_217799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which abnormal feature is visible in the image of the aortic root?\nChoices:\n(A) A normal coronary artery. \n(B) A CAA traversing over the aortic valve annulus. \n(C) A CAA traversing over the pulmonary valve annulus. \n(D) An aortic root dilation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.390625, "loss": 4.096428871154785 }, { "id": "identity_118320", "image": "VQA2.0/images/239752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface are the zebras standing on?" }, { "from": "gpt", "value": "The answer is Hard ground" } ], "clip_score": 32.25, "loss": 4.656932830810547 }, { "id": "identity_319091", "image": "AI2D/images/2566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the sun located in the diagram?\nChoices:\n(A) A\n(B) F\n(C) H\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.262197494506836 }, { "id": "identity_291028", "image": "IconQA/images/45556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.84375, "loss": 4.739768028259277 }, { "id": "identity_265977", "image": "A-OKVQA/images/24755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group of people is gathered behind the skiers, watching them while standing still? Are they:\n(A) judges\n(B) spectators\n(C) diners\n(D) waiters" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.375, "loss": 4.756673812866211 }, { "id": "identity_326093", "image": "PMC-VQA/images/PMC3502823_fig1_167382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the findings in this image?\nChoices:\n(A) Small intestine\n(B) Stomach\n(C) Right colon\n(D) Left colon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.012884140014648 }, { "id": "identity_65075", "image": "AI2D/images/3061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many centrioles are depicted in the cell diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.0, "loss": 4.110767841339111 }, { "id": "identity_275600", "image": "ScienceQA/images/1947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism among the following, indicated by the arrows, is classified as a decomposer?\nChoices:\n(A) mushroom\n(B) lichen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.65625, "loss": 4.497860431671143 }, { "id": "identity_19220", "image": "IconQA/images/54866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.375, "loss": 4.617421627044678 }, { "id": "identity_36352", "image": "A-OKVQA/images/580414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the large triangular object in the distance?\nChoices:\n(A) sculpture\n(B) cloud\n(C) mountain\n(D) mall" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.7578125, "loss": 4.4319658279418945 }, { "id": "identity_312821", "image": "Super-CLEVR/images/superCLEVR_new_000169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large metal items than smaller objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.0, "loss": 4.901883602142334 }, { "id": "identity_200274", "image": "TextVQA/images/3ff51c29c8072953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the team's uniform?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.375, "loss": 4.034162521362305 }, { "id": "identity_15640", "image": "ScienceQA/images/414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 22 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 16 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.375, "loss": 4.101991653442383 }, { "id": "identity_71307", "image": "AI2D/images/1862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of seals are represented in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 34.8125, "loss": 4.559070110321045 }, { "id": "identity_280954", "image": "TQA/images/abc_question_images/skin_cross_section_17586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the Blood Vessels:\n(A) Y\n(B) D\n(C) R\n(D) H" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.9375, "loss": 4.07223653793335 }, { "id": "identity_151741", "image": "ScienceQA/images/6357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of teeth does the handsaw in the image have?" }, { "from": "gpt", "value": "The answer is Fine teeth for cutting wood" } ], "clip_score": 32.8125, "loss": 4.2123847007751465 }, { "id": "identity_326929", "image": "TabMWP/images/37204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.248997688293457 }, { "id": "identity_308812", "image": "ScienceQA/images/18726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony depicted?\nChoices:\n(A) South Carolina\n(B) North Carolina\n(C) Connecticut\n(D) Georgia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.765625, "loss": 4.028820991516113 }, { "id": "identity_317451", "image": "ScienceQA/images/15520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes this forest ecosystem?\nChoices:\n(A) It has soil that is poor in nutrients. It also has only a few types of trees.\n(B) It has soil that is rich in nutrients. It also has only a few types of trees." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.890625, "loss": 4.3216552734375 }, { "id": "identity_283199", "image": "A-OKVQA/images/545237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of light is the circular one on the tall building? Is it a clock, floodlight, window, or sign?\nChoices:\n(A) clock\n(B) floodlight\n(C) window\n(D) sign" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.359375, "loss": 4.383726596832275 }, { "id": "identity_11010", "image": "FigureQA/images/48065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2109375, "loss": 4.008091926574707 }, { "id": "identity_288246", "image": "A-OKVQA/images/539299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the orange color of the men's vests?\nChoices:\n(A) camouflage\n(B) dress code\n(C) fashion\n(D) visibility" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.7734375, "loss": 4.242688179016113 }, { "id": "identity_274564", "image": "A-OKVQA/images/237463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following individuals has lighter hair than the person in question? \n(A) Margaret Qualley\n(B) Penelope Cruz\n(C) Taylor Swift\n(D) Natalie Portman" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.6875, "loss": 4.246614933013916 }, { "id": "identity_192417", "image": "TextVQA/images/da147a39299fe540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the aircraft in the foreground?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 28.453125, "loss": 4.312932014465332 }, { "id": "identity_6934", "image": "Super-CLEVR/images/superCLEVR_new_000106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the cyan rubber thing that is the same shape as the metal thing?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 24.84375, "loss": 4.465923309326172 }, { "id": "identity_263778", "image": "TabMWP/images/36946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time does the train arrive at the restaurant district according to the schedule provided?\nChoices:\n(A) 1:30 P.M.\n(B) 11:00 A.M.\n(C) 12:55 P.M.\n(D) 9:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.107501983642578 }, { "id": "identity_147742", "image": "DocVQA/images/ssjw0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the section that comes after \"The President's Statement\"?" }, { "from": "gpt", "value": "The answer is Grants Awarded" } ], "clip_score": 20.53125, "loss": 4.0261030197143555 }, { "id": "identity_275928", "image": "A-OKVQA/images/538057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is likely happening in the building to the right? Is it a baseball game, roller derby, worship service, or property auction?\nChoices:\n(A) baseball game\n(B) roller derby\n(C) worship service\n(D) property auction" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.328125, "loss": 4.383307933807373 }, { "id": "identity_2878", "image": "FigureQA/images/75147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Orange have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 4.344520568847656 }, { "id": "identity_56867", "image": "DocVQA/images/fshk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name listed under individual names?" }, { "from": "gpt", "value": "The answer is Dick Wynandotte" } ], "clip_score": 23.46875, "loss": 6.2281293869018555 }, { "id": "identity_131672", "image": "A-OKVQA/images/447991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is advertised on the store sign?" }, { "from": "gpt", "value": "The answer is iPad Phone" } ], "clip_score": 17.640625, "loss": 4.1762776374816895 }, { "id": "identity_308308", "image": "Super-CLEVR/images/superCLEVR_new_000978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object that is left of a big metal bicycle and in front of a tiny yellow bus?" }, { "from": "gpt", "value": "The answer is airliner" } ], "clip_score": 29.71875, "loss": 4.673373222351074 }, { "id": "identity_263638", "image": "Super-CLEVR/images/superCLEVR_new_001149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the shape of the small green object located to the left of the large matte object behind the brown object in front of the yellow shiny tandem bike." }, { "from": "gpt", "value": "The answer is cruiser" } ], "clip_score": 27.171875, "loss": 4.20057487487793 }, { "id": "identity_327085", "image": "IconQA/images/12051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this object flat or solid?\nChoices:\n(A) flat\n(B) solid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.014800071716309 }, { "id": "identity_22471", "image": "PMC-VQA/images/PMC7670670_Fig2_299640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the infected fistula? \nChoices:\n(A) Right side \n(B) Left side \n(C) Center \n(D) Top " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.703125, "loss": 4.073939323425293 }, { "id": "identity_276010", "image": "AI2D/images/4063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the innermost part of the sun?\nChoices:\n(A) inner core\n(B) outer core\n(C) corona\n(D) chromosphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.125, "loss": 4.049004554748535 }, { "id": "identity_204238", "image": "VQA2.0/images/486513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people sitting on the bench?" }, { "from": "gpt", "value": "The answer is Yes, four people" } ], "clip_score": 24.921875, "loss": 4.074258804321289 }, { "id": "identity_204957", "image": "A-OKVQA/images/126264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible stripes are there on the player's shirt?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.234375, "loss": 4.367316246032715 }, { "id": "identity_105683", "image": "Super-CLEVR/images/superCLEVR_new_000547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have a metallic finish?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.46875, "loss": 4.801377773284912 }, { "id": "identity_32549", "image": "PMC-VQA/images/PMC4665545_diagnostics-04-00057-f005_448998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being imaged in this OCT scan? \nChoices:\n(A) Nasal cavity \n(B) Esophagus \n(C) Small intestine \n(D) Colon " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.21875, "loss": 4.252675533294678 }, { "id": "identity_220378", "image": "Super-CLEVR/images/superCLEVR_new_001416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly to the left of the green fighter?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 23.484375, "loss": 4.5904998779296875 }, { "id": "identity_272248", "image": "PMC-VQA/images/PMC7729070_F1_318361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the gyrus indicated by the white arrows in the sagittal T1 MRI scan?\nChoices:\n(A) precentral gyrus\n(B) postcentral gyrus\n(C) middle temporal gyrus\n(D) inferior frontal gyrus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.28125, "loss": 4.5204315185546875 }, { "id": "identity_293298", "image": "A-OKVQA/images/463120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found on the dock among the following options: people, cat, firefighter, or car?\nChoices:\n(A) people\n(B) cat\n(C) firefighter\n(D) car" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 4.1038947105407715 }, { "id": "identity_234", "image": "A-OKVQA/images/317090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was just tossed in the air?\nChoices:\n(A) baby\n(B) tennis ball\n(C) pizza dough\n(D) frisbee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.265625, "loss": 4.281593322753906 }, { "id": "identity_38248", "image": "VizWiz/images/VizWiz_train_00018691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: I believe I was told this was a strawberry ice cream. Is somebody holding the ice cream, because this is an Avon book?" }, { "from": "gpt", "value": "The answer is no not being held" } ], "clip_score": 27.03125, "loss": 4.127157211303711 }, { "id": "identity_10559", "image": "VQA2.0/images/493454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors do the surfboards have in common?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 27.421875, "loss": 4.504603385925293 }, { "id": "identity_193017", "image": "A-OKVQA/images/16578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.421875, "loss": 4.804287433624268 }, { "id": "identity_146251", "image": "DocVQA/images/hgph0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the factory where the water analysis was conducted?" }, { "from": "gpt", "value": "The answer is Fort Morgan Factory" } ], "clip_score": 26.40625, "loss": 4.107510089874268 }, { "id": "identity_306420", "image": "FigureQA/images/19343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum value for Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.775328159332275 }, { "id": "identity_202260", "image": "A-OKVQA/images/272957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible storefront sign in the image, and if so, what does it say?" }, { "from": "gpt", "value": "The answer is Yes, \"Olympic Club" } ], "clip_score": 17.578125, "loss": 4.129271507263184 }, { "id": "identity_23934", "image": "VQA2.0/images/378637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the main table?" }, { "from": "gpt", "value": "The answer is plant" } ], "clip_score": 13.8203125, "loss": 4.923285007476807 }, { "id": "identity_136731", "image": "DocVQA/images/fmjm0223_40.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the format of the document's title?" }, { "from": "gpt", "value": "The answer is Statement" } ], "clip_score": 20.046875, "loss": 4.430115222930908 }, { "id": "identity_269048", "image": "TQA/images/question_images/hair_follicles_6987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the component that precedes the Hair shaft in the hair structure?\nChoices:\n(A) Sebaceous gland\n(B) Inner root\n(C) Arrector pili muscle\n(D) Outer root" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.15625, "loss": 4.650452136993408 }, { "id": "identity_328719", "image": "PMC-VQA/images/PMC5826924_Fig6_280362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ moves upward into the thorax?\nChoices:\n(A) Heart \n(B) Liver \n(C) Lungs \n(D) Pancreas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.703125, "loss": 4.785769462585449 }, { "id": "identity_320953", "image": "PMC-VQA/images/PMC7597573_F1_285965.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were used to capture this?\nChoices:\n(A) CT images \n(B) T2-weighted images \n(C) T1-weighted images \n(D) MRI images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.203125, "loss": 4.682596683502197 }, { "id": "identity_16073", "image": "AI2D/images/2751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phase does the letter C represent?\nChoices:\n(A) Waxing Gibbous\n(B) Waning Crescent\n(C) Waxing Crescent\n(D) Waning Gibbous" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0625, "loss": 4.2344560623168945 }, { "id": "identity_321359", "image": "A-OKVQA/images/194108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals walking near?\nChoices:\n(A) camel\n(B) elephant\n(C) tower\n(D) clown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.09375, "loss": 4.531215190887451 }, { "id": "identity_183244", "image": "PMC-VQA/images/PMC8268606_Fig1_5131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lesions are marked with arrowheads in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.3125, "loss": 4.448417663574219 }, { "id": "identity_259459", "image": "DocVQA/images/kgpj0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main concerns raised by the visitors at the meeting regarding labeling, and how do these concerns relate to the overall theme of the memorandum?" }, { "from": "gpt", "value": "The answer is Unique problems, complexity, unforeseen consequences in food labeling" } ], "clip_score": 23.28125, "loss": 4.22285270690918 }, { "id": "identity_118670", "image": "A-OKVQA/images/318820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is depicted on the person's t-shirt?" }, { "from": "gpt", "value": "The answer is A dog" } ], "clip_score": 17.640625, "loss": 4.168735504150391 }, { "id": "identity_143024", "image": "DVQA/images/bar_train_00050305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total units sold for \"flavor\" in all groups combined?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 26.296875, "loss": 4.143676280975342 }, { "id": "identity_304086", "image": "Super-CLEVR/images/superCLEVR_new_001305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the green thing that is left of the small metal dirtbike the same as the blue object that is to the left of the large cyan thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.453125, "loss": 5.23538064956665 }, { "id": "identity_314765", "image": "FigureQA/images/95784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.140625, "loss": 4.14620304107666 }, { "id": "identity_15336", "image": "PMC-VQA/images/PMC5440401_Fig1_167188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the image?\nChoices:\n(A) Direction of water flow \n(B) Normal distribution of AQP4 \n(C) Level of AQP4 expression \n(D) Relative normal AQP4 distribution" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.7734375, "loss": 4.42465353012085 }, { "id": "identity_157774", "image": "TQA/images/abc_question_images/continental_drift_19083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which section will become South America?" }, { "from": "gpt", "value": "The answer is Section J" } ], "clip_score": 29.921875, "loss": 4.019484043121338 }, { "id": "identity_14822", "image": "A-OKVQA/images/557045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of pitch is this?\nChoices:\n(A) fastball\n(B) ceremonial pitch\n(C) knuckleball\n(D) forkball" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.078125, "loss": 4.267573833465576 }, { "id": "identity_316674", "image": "VQA2.0/images/160848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this boat named for someone?" }, { "from": "gpt", "value": "The answer is female" } ], "clip_score": 22.515625, "loss": 5.780643463134766 }, { "id": "identity_3622", "image": "A-OKVQA/images/314690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What small creature is likely living here?\nChoices:\n(A) baby\n(B) midget\n(C) mini monster\n(D) monkey" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.765625, "loss": 4.24249792098999 }, { "id": "identity_107205", "image": "VQA2.0/images/212838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object being held?" }, { "from": "gpt", "value": "The answer is Silver or grey" } ], "clip_score": 15.28125, "loss": 4.229565620422363 }, { "id": "identity_19272", "image": "DVQA/images/bar_train_00111130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does the blue color represent?" }, { "from": "gpt", "value": "The answer is faint" } ], "clip_score": 24.6875, "loss": 4.402892112731934 }, { "id": "identity_289635", "image": "ScienceQA/images/320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\n(A) Des Moines\n(B) Cedar Rapids\n(C) Baltimore\n(D) Pierre" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 4.1243672370910645 }, { "id": "identity_335427", "image": "PMC-VQA/images/PMC8699387_brainsci-11-01615-f001_145906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are shown in these scans?\nChoices:\n(A) Ventricular encephalitis lesions \n(B) Dilated perivascular spaces lesions \n(C) Parenchymal hematoma lesions \n(D) Myelin sheath lesions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.09375, "loss": 4.180066108703613 }, { "id": "identity_289871", "image": "PMC-VQA/images/PMC7758316_F3_325762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen inside the lumen of a small artery?\nChoices:\n(A) Blood clot \n(B) Fibrocartilaginous embolus \n(C) Fat embolus \n(D) Calcium embolus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.40625, "loss": 4.227278709411621 }, { "id": "identity_290426", "image": "PMC-VQA/images/PMC7564634_cancers-12-02585-f003_275662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red arrow in (D) MRI pointing towards?\nChoices:\n(A) Tumor location \n(B) Lesion uptake \n(C) Surgical alteration \n(D) Contrast-enhancement" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.5, "loss": 4.747371673583984 }, { "id": "identity_21366", "image": "A-OKVQA/images/194108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people walking near?\nChoices:\n(A) camel\n(B) elephant\n(C) tower\n(D) clown" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.9375, "loss": 4.502010822296143 }, { "id": "identity_22244", "image": "A-OKVQA/images/277470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of business is shown?\nChoices:\n(A) food\n(B) rental\n(C) beauty\n(D) law" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.21875, "loss": 4.52050256729126 }, { "id": "identity_324807", "image": "PMC-VQA/images/PMC5954786_Fig2_314999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in the image with the red arrows?\nChoices:\n(A) A local relapse\n(B) Metastatic retroperitoneal lymph node \n(C) Both A and B\n(D) Normal lymph node" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.21875, "loss": 4.486506462097168 }, { "id": "identity_287123", "image": "PMC-VQA/images/PMC5356317_Fig6_141696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dashed black lines represent in the diagram? \nChoices:\n(A) Expression domains \n(B) Location of the mouth opening \n(C) Location of the prototroch \n(D) Location of the anus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.390625, "loss": 5.157803058624268 }, { "id": "identity_317450", "image": "PMC-VQA/images/PMC3948907_Fig2_273076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the CT scan?\nChoices:\n(A) Left Abdomen\n(B) Right Abdomen\n(C) Lower Abdomen\n(D) Upper Abdomen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.341224193572998 }, { "id": "identity_192406", "image": "PMC-VQA/images/PMC7806997_Fig2_342619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct lesions can be counted in the image?" }, { "from": "gpt", "value": "The answer is Seven lesions" } ], "clip_score": 26.125, "loss": 4.18429708480835 }, { "id": "identity_127423", "image": "TextVQA/images/1ea75b6f56c2a71c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magazines have a red color element on their cover?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.234375, "loss": 4.257226943969727 }, { "id": "identity_17648", "image": "ScienceQA/images/10148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this passage describe the weather or the climate?\nChoices:\n(A) climate\n(B) weather" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.859375, "loss": 4.0067620277404785 }, { "id": "identity_284100", "image": "Super-CLEVR/images/superCLEVR_new_001556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the metallic airplane located to the right of the helicopter?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 25.828125, "loss": 4.1053466796875 }, { "id": "identity_287047", "image": "A-OKVQA/images/185200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which business might the person specialize in repairing televisions, cars, phones, or baby carriages?\nChoices:\n(A) televisions\n(B) cars\n(C) phones\n(D) baby carriages" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.453125, "loss": 4.191341400146484 }, { "id": "identity_289944", "image": "A-OKVQA/images/5123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of waste should be disposed of in these trash cans? \nOptions:\n(A) recycling\n(B) general waste\n(C) horse manure\n(D) greenery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.140625, "loss": 4.600194454193115 }, { "id": "identity_322793", "image": "AI2D/images/4280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What generates biogenic methane?\nChoices:\n(A) thermogenic methane\n(B) bacterial actions\n(C) methane seeps\n(D) permafrost" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.140625, "loss": 4.22319221496582 }, { "id": "identity_290816", "image": "A-OKVQA/images/178078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what angle is the motorcycle positioned in relation to the cars?\nChoices:\n(A) obtuse\n(B) perpendicular\n(C) right\n(D) parallel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.734375, "loss": 4.275254249572754 }, { "id": "identity_293884", "image": "A-OKVQA/images/99539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which drink is the most probable one in the cup on the table among orange juice, coffee, alcohol, and water?\nChoices:\n(A) orange juice\n(B) coffee\n(C) alcohol\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.6953125, "loss": 4.080061912536621 }, { "id": "identity_147940", "image": "DocVQA/images/kzbn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the term \"intended\" imply according to the text?" }, { "from": "gpt", "value": "The answer is It implies express or implied demonstration of intention" } ], "clip_score": 13.78125, "loss": 4.18025541305542 }, { "id": "identity_337272", "image": "IconQA/images/28593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 4.407540798187256 }, { "id": "identity_330163", "image": "PMC-VQA/images/PMC7960033_fig4_391558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict 1 week after grafting?\nChoices:\n(A) Normal tissue growth\n(B) A tumor growth\n(C) An infection\n(D) A scar formation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.5508832931518555 }, { "id": "identity_321055", "image": "A-OKVQA/images/166477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with their hands in the air doing?\nChoices:\n(A) fighting\n(B) directing traffic\n(C) swimming\n(D) tricks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.421875, "loss": 4.381913661956787 }, { "id": "identity_278079", "image": "IconQA/images/30588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth one in the sequence, given that the first picture is a bucket?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.796875, "loss": 4.350856304168701 }, { "id": "identity_333762", "image": "PMC-VQA/images/PMC7763342_brainsci-10-00963-f002_328088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image depicts a superiorly directed ACoA aneurysm?\nChoices:\n(A) 3D-CTA images \n(B) CT scan images \n(C) MR images \n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.0783514976501465 }, { "id": "identity_276198", "image": "TextVQA/images/4ef05378cb434836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which direction does the street go?" }, { "from": "gpt", "value": "The answer is n" } ], "clip_score": 13.984375, "loss": 4.365179061889648 }, { "id": "identity_207567", "image": "VQA2.0/images/255422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red jackets?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.46875, "loss": 4.626310348510742 }, { "id": "identity_131205", "image": "TextVQA/images/47f01ef525e961f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field wearing white jerseys?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.75, "loss": 4.673045635223389 }, { "id": "identity_312058", "image": "PMC-VQA/images/PMC3750476_F5_226406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to capture this image?\nChoices:\n(A) Transmission electron microscopy\n(B) Scanning electron microscopy\n(C) Confocal microscopy\n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0, "loss": 4.5453057289123535 }, { "id": "identity_288588", "image": "IconQA/images/78474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.15625, "loss": 4.234529495239258 }, { "id": "identity_318753", "image": "ScienceQA/images/15231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this made of?\nChoices:\n(A) glass\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.984375, "loss": 4.051455020904541 }, { "id": "identity_289269", "image": "PMC-VQA/images/PMC5689208_Fig1_240521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI was conducted on the patient?\nOptions:\n(A) T1-weighted\n(B) T2-weighted\n(C) Intermediate-weighted\n(D) Gradient-echo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 4.166264533996582 }, { "id": "identity_111438", "image": "A-OKVQA/images/229949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible overhead wires are above the train?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.28125, "loss": 4.407223701477051 }, { "id": "identity_330483", "image": "A-OKVQA/images/338960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does this tree come from?\nChoices:\n(A) germany\n(B) england\n(C) switzerland\n(D) russia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.25, "loss": 4.012759685516357 }, { "id": "identity_290121", "image": "VizWiz/images/VizWiz_train_00017189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the appearance of this dog." }, { "from": "gpt", "value": "The answer is lab" } ], "clip_score": 24.828125, "loss": 5.08825159072876 }, { "id": "identity_320773", "image": "IconQA/images/214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a dog. Which image is fourth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 4.137295722961426 }, { "id": "identity_312435", "image": "IconQA/images/72187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circular face on this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.655858039855957 }, { "id": "identity_103777", "image": "VQA-AS/images/2859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pants worn by the person standing?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.28125, "loss": 4.945766448974609 }, { "id": "identity_15437", "image": "DVQA/images/bar_train_00082729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is remark" } ], "clip_score": 22.0625, "loss": 4.0435895919799805 }, { "id": "identity_23815", "image": "A-OKVQA/images/413248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transportation is shown?\nChoices:\n(A) water\n(B) air\n(C) rail\n(D) road" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.578125, "loss": 4.267649173736572 }, { "id": "identity_34349", "image": "VizWiz/images/VizWiz_train_00006336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of soda is this?" }, { "from": "gpt", "value": "The answer is tin soda" } ], "clip_score": 28.625, "loss": 4.057624340057373 }, { "id": "identity_330263", "image": "PMC-VQA/images/PMC7072331_cancers-12-00486-f001_134200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to detect some focal lesions in the rachis?\nChoices:\n(A) CT \n(B) T1-weighted \n(C) T2-weighted STIR \n(D) PET" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.140625, "loss": 4.203803062438965 }, { "id": "identity_246030", "image": "DocVQA/images/nnbx0223_154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: List all the key management personnel and their respective roles, and identify which of them ceased to be a director on 31.03.2008" }, { "from": "gpt", "value": "The answer is C.R. Green" } ], "clip_score": 26.125, "loss": 4.016754150390625 }, { "id": "identity_24647", "image": "TextVQA/images/d7e1c905d63340f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first stop after leaving the station?" }, { "from": "gpt", "value": "The answer is fortezza" } ], "clip_score": 15.1953125, "loss": 4.3599467277526855 }, { "id": "identity_26924", "image": "PMC-VQA/images/PMC3772311_F4_231358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is illustrated at the bottom of the figure? \nChoices:\n(A) Overlapping activation clusters \n(B) Abstract language semantics processing \n(C) Gesture semantics processing \n(D) Concrete language semantics processing " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.921875, "loss": 4.6018500328063965 }, { "id": "identity_278732", "image": "FigureQA/images/61645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Forest Green or Saddle Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.15625, "loss": 4.066964626312256 }, { "id": "identity_69151", "image": "TQA/images/question_images/human_system_reproductory_7046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures are labeled on the left side of the female reproductive system in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.09375, "loss": 4.305777549743652 }, { "id": "identity_3257", "image": "Super-CLEVR/images/superCLEVR_new_001841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the blue object that is to the left of the brown matte bicycle and the large tandem bike that is in front of the big brown road bike have the same material?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.375, "loss": 5.7482805252075195 }, { "id": "identity_6434", "image": "IconQA/images/106502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is eighth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.060749530792236 }, { "id": "identity_34302", "image": "ScienceQA/images/14614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Shelby's experiment can best answer.\nChoices:\n(A) Can light bulbs stay lit longer when circuits include copper or when circuits include iron?\n(B) Do circuits that include iron produce dimmer light than circuits that include copper?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.186537742614746 }, { "id": "identity_294528", "image": "ScienceQA/images/13234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina?\nChoices:\n(A) Raleigh\n(B) Charlotte\n(C) Lincoln\n(D) Santa Fe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.285380840301514 }, { "id": "identity_321102", "image": "AI2D/images/1696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the line that runs along the surface in the illustration called?\n\nChoices:\n(A) Wave front\n(B) Epicenter\n(C) Fault scarp\n(D) Focus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 4.018494129180908 }, { "id": "identity_327871", "image": "VQA-AS/images/6976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 19.0625, "loss": 4.088540554046631 }, { "id": "identity_288557", "image": "DocVQA/images/nykh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the recipient of this letter?" }, { "from": "gpt", "value": "The answer is Miss Bev Rosen" } ], "clip_score": 28.015625, "loss": 5.063092231750488 }, { "id": "identity_77649", "image": "TQA/images/question_images/parts_worm_7311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely habitat of the organism depicted in the image based on its morphology?" }, { "from": "gpt", "value": "The answer is Inside a host or water bodies" } ], "clip_score": 25.140625, "loss": 4.377720355987549 }, { "id": "identity_3062", "image": "FigureQA/images/15113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.53125, "loss": 4.664809703826904 }, { "id": "identity_321855", "image": "TextVQA/images/3e3b5093bb67a22f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the red sign say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.046875, "loss": 4.461650371551514 }, { "id": "identity_28802", "image": "A-OKVQA/images/184241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with the racquets sitting on?\nChoices:\n(A) bed\n(B) tree stump\n(C) office chair\n(D) bench" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.328125, "loss": 4.30235481262207 }, { "id": "identity_268898", "image": "AI2D/images/4223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is represented at point B in the diagram?\nChoices:\n(A) surface rock\n(B) metamorphic\n(C) igneous\n(D) sediment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 4.912036418914795 }, { "id": "identity_275437", "image": "ScienceQA/images/6356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\nChoices:\n(A) Oklahoma City\n(B) Richmond\n(C) Arlington\n(D) Portland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.515625, "loss": 4.240690231323242 }, { "id": "identity_187635", "image": "VQA2.0/images/55942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.1796875, "loss": 4.427265644073486 }, { "id": "identity_264149", "image": "A-OKVQA/images/112377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the bus being parked near the curb?\nChoices:\n(A) getting cleaned\n(B) getting passengers\n(C) refueling\n(D) changing tires" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.776303768157959 }, { "id": "identity_12554", "image": "FigureQA/images/64185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.212704181671143 }, { "id": "identity_22695", "image": "PMC-VQA/images/PMC5646301_fig7_228418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is seen as a high-density area on the images?\nChoices:\n(A) Blood vessels \n(B) Bone \n(C) Muscle \n(D) Fat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.265625, "loss": 4.390749931335449 }, { "id": "identity_17645", "image": "TextVQA/images/22c32952b17e0cbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand name of the wines on display?" }, { "from": "gpt", "value": "The answer is cadus" } ], "clip_score": 30.078125, "loss": 4.479546546936035 }, { "id": "identity_2447", "image": "PMC-VQA/images/PMC6949338_Fig3_96684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the panoramic radiograph? \nChoices:\n(A) Bone graft placement after implant removal \n(B) Several implants to support the final prosthesis \n(C) Counter-torque regions of the implants \n(D) The removal of the provisional prosthesis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.734375, "loss": 4.140527248382568 }, { "id": "identity_211329", "image": "A-OKVQA/images/440329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person riding the cow on the right?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.4375, "loss": 4.147542476654053 }, { "id": "identity_16876", "image": "FigureQA/images/90691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.545373916625977 }, { "id": "identity_269828", "image": "DVQA/images/bar_train_00083511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in the chart?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.1875, "loss": 4.063572883605957 }, { "id": "identity_113082", "image": "PMC-VQA/images/PMC3691725_F5_213673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the arrangement pattern of the spheres in the image?" }, { "from": "gpt", "value": "The answer is Random arrangement" } ], "clip_score": 23.46875, "loss": 4.588450908660889 }, { "id": "identity_283528", "image": "ScienceQA/images/12469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly represents the transfer of thermal energy during this time: from the surroundings to each cake or from each cake to the surroundings?\nChoices:\n(A) the surroundings . . . each cake\n(B) each cake . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.025064945220947 }, { "id": "identity_200266", "image": "PMC-VQA/images/PMC9236663_FIG1_325682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blood vessels can be seen branching out from the optic disc?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 32.4375, "loss": 4.367539405822754 }, { "id": "identity_290431", "image": "PMC-VQA/images/PMC4807453_RSOS150461F1_14922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique was utilized to create the image?\nChoices:\n(A) Computed Tomography \n(B) Magnetic Resonance Imaging \n(C) X-Ray \n(D) Ultrasonics" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.515625, "loss": 4.120309829711914 }, { "id": "identity_286641", "image": "PMC-VQA/images/PMC7693558_F2_306672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the image is indicated by the star?\nChoices:\n(A) Proximal part\n(B) Caudal part\n(C) Distal part\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.90625, "loss": 4.7408857345581055 }, { "id": "identity_288803", "image": "PMC-VQA/images/PMC8085189_Fig1_431007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the CT scan conducted in terms of body position? \nOptions:\n(A) Supine\n(B) Prone\n(C) Standing\n(D) Lateral" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 4.565508842468262 }, { "id": "identity_333489", "image": "A-OKVQA/images/256852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does this bus operate?\nChoices:\n(A) rural\n(B) city\n(C) suburbs\n(D) farm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.0625, "loss": 4.552199840545654 }, { "id": "identity_31985", "image": "PMC-VQA/images/PMC7876058_F1_365737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the affected area? \nChoices:\n(A) blue \n(B) red \n(C) green \n(D) yellow " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1953125, "loss": 4.079589366912842 }, { "id": "identity_734", "image": "Super-CLEVR/images/superCLEVR_new_001869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte thing of the same size as the yellow double bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.890625, "loss": 5.117381572723389 }, { "id": "identity_144393", "image": "DocVQA/images/mfxh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the passenger?" }, { "from": "gpt", "value": "The answer is SHANKAR DR" } ], "clip_score": 17.15625, "loss": 4.492727279663086 }, { "id": "identity_23625", "image": "DVQA/images/bar_train_00048541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the darkorange color represent?" }, { "from": "gpt", "value": "The answer is change" } ], "clip_score": 22.8125, "loss": 4.294623374938965 }, { "id": "identity_71385", "image": "ScienceQA/images/6895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the magnets in the image?" }, { "from": "gpt", "value": "The answer is Same orientation" } ], "clip_score": 23.140625, "loss": 4.5641770362854 }, { "id": "identity_284549", "image": "VizWiz/images/VizWiz_train_00007718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is i don t know" } ], "clip_score": 20.25, "loss": 4.233409404754639 }, { "id": "identity_276966", "image": "FigureQA/images/84071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.166214466094971 }, { "id": "identity_4389", "image": "TextVQA/images/215011a0e3107193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the banner on the store say?" }, { "from": "gpt", "value": "The answer is now open" } ], "clip_score": 22.984375, "loss": 4.389721393585205 }, { "id": "identity_264047", "image": "VizWiz/images/VizWiz_train_00019583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of this object and what is its identity?" }, { "from": "gpt", "value": "The answer is lizards orange left green right" } ], "clip_score": 34.0625, "loss": 4.5830769538879395 }, { "id": "identity_36300", "image": "ScienceQA/images/1022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) The Bahamas\n(B) Dominica\n(C) Jamaica\n(D) the Dominican Republic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.78125, "loss": 4.67049503326416 }, { "id": "identity_36787", "image": "A-OKVQA/images/402967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is located here?\nChoices:\n(A) cafe\n(B) pagoda\n(C) pavilion\n(D) barn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.84375, "loss": 4.313411712646484 }, { "id": "identity_266340", "image": "DVQA/images/bar_train_00152719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, counting from the bottom, is labeled as the sixth?" }, { "from": "gpt", "value": "The answer is shore" } ], "clip_score": 19.90625, "loss": 4.298852443695068 }, { "id": "identity_24536", "image": "A-OKVQA/images/429836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is surrounding the field?\nChoices:\n(A) scarecrows\n(B) football fans\n(C) corn\n(D) baseball fans" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.53125, "loss": 4.1144700050354 }, { "id": "identity_296880", "image": "PMC-VQA/images/PMC8833544_animals-12-00349-f001_189689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the bottom right corner of the image?\nChoices:\n(A) gas content \n(B) fluid content \n(C) fecal matter \n(D) tumor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.847529411315918 }, { "id": "identity_196760", "image": "A-OKVQA/images/416521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing medals in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 19.875, "loss": 4.1082258224487305 }, { "id": "identity_270137", "image": "VizWiz/images/VizWiz_train_00004999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this object?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.65625, "loss": 4.166858196258545 }, { "id": "identity_110717", "image": "TextVQA/images/04cfef06445e63ab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the decorative panel made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 18.828125, "loss": 4.253593921661377 }, { "id": "identity_330305", "image": "DVQA/images/bar_train_00066582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is mad" } ], "clip_score": 21.609375, "loss": 4.7656707763671875 }, { "id": "identity_313675", "image": "TextVQA/images/cf8702d81f8b8fde.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who loves this place?" }, { "from": "gpt", "value": "The answer is i" } ], "clip_score": 17.28125, "loss": 4.474160671234131 }, { "id": "identity_154320", "image": "ScienceQA/images/6493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual items are depicted in the image on the right?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.65625, "loss": 4.242149829864502 }, { "id": "identity_184534", "image": "A-OKVQA/images/497261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.828125, "loss": 4.411401271820068 }, { "id": "identity_125505", "image": "VQA2.0/images/506322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arched windows are there on the visible side of the tower?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.84375, "loss": 4.527296543121338 }, { "id": "identity_74185", "image": "ScienceQA/images/4434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.671875, "loss": 4.2858662605285645 }, { "id": "identity_287689", "image": "PMC-VQA/images/PMC7993516_Fig1_399517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the spinal condition in this patient from the following options:\n(A) Convex scoliosis\n(B) Kyphosis\n(C) Congenital scoliosis\n(D) Lumbar lordosis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.796875, "loss": 4.13140869140625 }, { "id": "identity_250681", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the forwarded meeting request and who forwarded it?" }, { "from": "gpt", "value": "The answer is Chlorpyrifos, Kime, Robin" } ], "clip_score": 21.875, "loss": 5.450453758239746 }, { "id": "identity_322366", "image": "A-OKVQA/images/571141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for skateboarding with the right foot forward?\nChoices:\n(A) goofy\n(B) rider\n(C) regular\n(D) looser" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.075817584991455 }, { "id": "identity_303965", "image": "AI2D/images/3807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in this?\nChoices:\n(A) clypeus\n(B) mandible\n(C) antenna\n(D) eye" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.703125, "loss": 4.558584690093994 }, { "id": "identity_186923", "image": "A-OKVQA/images/232546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the historic sign?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.765625, "loss": 4.054490089416504 }, { "id": "identity_304142", "image": "VizWiz/images/VizWiz_train_00012319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this object?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.4375, "loss": 4.426821708679199 }, { "id": "identity_9957", "image": "FigureQA/images/44402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.796875, "loss": 4.1261210441589355 }, { "id": "identity_330759", "image": "A-OKVQA/images/225686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of the logo seen here?\nChoices:\n(A) circle\n(B) square\n(C) heart\n(D) triangle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.37109375, "loss": 4.159480094909668 }, { "id": "identity_21613", "image": "PMC-VQA/images/PMC7355790_microorganisms-08-00807-f004_212548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure of the spongy areas revealed in this image? \nChoices:\n(A) Hollow \n(B) Dense \n(C) Trabecular \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.546875, "loss": 4.152769088745117 }, { "id": "identity_322218", "image": "A-OKVQA/images/344292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of business most likely uses this place?\nChoices:\n(A) brewing\n(B) dairy\n(C) catering\n(D) manufacturing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.046875, "loss": 4.152633190155029 }, { "id": "identity_38531", "image": "PMC-VQA/images/PMC7921762_JDS-22-71-g003.tif_380749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the plane of the cross-sectional view in the preoperative computed tomography?\nChoices:\n(A) Coronal \n(B) Sagittal \n(C) Transverse \n(D) Longitudinal " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 4.237465858459473 }, { "id": "identity_268580", "image": "Super-CLEVR/images/superCLEVR_new_001512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does a large yellow motorbike exist that is constructed from the same material as the large cruiser?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.265625, "loss": 4.904158592224121 }, { "id": "identity_240805", "image": "PMC-VQA/images/PMC6635723_F2_9840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary organ affected by the tracer uptake, and what are the possible implications for the patient's health based on the location of the uptake?" }, { "from": "gpt", "value": "The answer is Liver lesion indicates potential serious health conditions like liver cancer" } ], "clip_score": 29.859375, "loss": 4.4428229331970215 }, { "id": "identity_322089", "image": "A-OKVQA/images/123555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the sign next to the green light prohibit?\nChoices:\n(A) left turns\n(B) right turns\n(C) u-turns\n(D) going straight" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.421875, "loss": 4.293027400970459 }, { "id": "identity_25113", "image": "ScienceQA/images/20882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Idaho?\nChoices:\n(A) Boise\n(B) Helena\n(C) Louisville\n(D) Salem" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.25, "loss": 4.233637809753418 }, { "id": "identity_326350", "image": "A-OKVQA/images/2315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a distinctive characteristic of these creatures?\nChoices:\n(A) quills\n(B) gills\n(C) neck\n(D) trunk" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.390625, "loss": 4.197693347930908 }, { "id": "identity_267665", "image": "TQA/images/abc_question_images/solar_system_11427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following labels represents Jupiter?\n(A) M\n(B) U\n(C) Y\n(D) W" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.515625, "loss": 4.0331621170043945 }, { "id": "identity_208937", "image": "VQA2.0/images/125351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.375, "loss": 4.60620641708374 }, { "id": "identity_19753", "image": "DocVQA/images/fhlc0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is at the top-right of the document?" }, { "from": "gpt", "value": "The answer is PTD IN USA" } ], "clip_score": 11.140625, "loss": 5.117888927459717 }, { "id": "identity_297882", "image": "IconQA/images/52985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.9375, "loss": 4.666886806488037 }, { "id": "identity_330135", "image": "PMC-VQA/images/PMC4611029_Fig1_434760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the scale bar in this image showing?\nChoices:\n(A) amount of staining in the image\n(B) distance between nerve fibers\n(C) size of the sensory nerve fibers\n(D) size of the image displayed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.5, "loss": 4.074007987976074 }, { "id": "identity_21248", "image": "TabMWP/images/30759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dr. Baxter, a zoo director, researched ticket prices at other zoos around the country. Which charges more for a senior ticket, the Los Angeles Zoo or the Oakland Zoo?\nChoices:\n(A) Los Angeles\n(B) Oakland" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 38.8125, "loss": 4.102293491363525 }, { "id": "identity_186062", "image": "TextVQA/images/00d3bc72520af34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are wearing a glove on their left hand?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.71875, "loss": 4.738255500793457 }, { "id": "identity_25598", "image": "Super-CLEVR/images/superCLEVR_new_001963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any brown shiny airliners?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 14.0625, "loss": 5.152609825134277 }, { "id": "identity_254184", "image": "VQA2.0/images/468796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What materials are used in the construction of the bench, and how many different materials can you identify?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 15.5703125, "loss": 5.429720878601074 }, { "id": "identity_36397", "image": "A-OKVQA/images/485483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the number 84 represent?\nChoices:\n(A) table number\n(B) store number\n(C) order number\n(D) price" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.421875, "loss": 4.151466369628906 }, { "id": "identity_329665", "image": "Super-CLEVR/images/superCLEVR_new_001465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the jet and the motorbike behind the large metal plane have the same color?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.984375, "loss": 5.497920036315918 }, { "id": "identity_307534", "image": "TQA/images/question_images/optics_reflection_9186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this diagram illustrating?\nChoices:\n(A) Light\n(B) Optics\n(C) Rays\n(D) Energy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.311914443969727 }, { "id": "identity_193613", "image": "A-OKVQA/images/183913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are displayed on the building?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.953125, "loss": 4.225076198577881 }, { "id": "identity_18343", "image": "Super-CLEVR/images/superCLEVR_new_001452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small road bike that is behind the tiny bus in front of the large cyan thing that is on the left side of the tiny gray matte bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.421875, "loss": 5.096310615539551 }, { "id": "identity_303048", "image": "PMC-VQA/images/PMC6894034_f2_79200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the aneurysm located in the body?\nChoices:\n(A) In the thoracic aorta\n(B) In the abdominal aorta\n(C) In the femoral artery\n(D) In the carotid artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.6875, "loss": 4.415989875793457 }, { "id": "identity_325910", "image": "PMC-VQA/images/PMC9187638_Fig2_308785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of imaging techniques were utilized in the analysis?\nChoices:\n(A) MRI and CT\n(B) X-ray and CT\n(C) MRI and MRA\n(D) PET and MRA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.65625, "loss": 4.5918803215026855 }, { "id": "identity_284375", "image": "A-OKVQA/images/129334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to the small elements displayed on the left side of the screen?\nChoices:\n(A) icons\n(B) bugs\n(C) snowflakes\n(D) cracks" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.140625, "loss": 4.615538120269775 }, { "id": "identity_951", "image": "FigureQA/images/34664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.515625, "loss": 4.161057472229004 }, { "id": "identity_333317", "image": "PMC-VQA/images/PMC5984782_Fig3_323393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the difference between these images e and f?\nChoices:\n(A) They are the same\n(B) Image e was taken from live CAM and f from fixed and sectioned CAM\n(C) Image e was taken from fixed CAM and f from live CAM\n(D) Image e was taken with a different microscope than image f." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.453125, "loss": 4.013171195983887 }, { "id": "identity_284508", "image": "ScienceQA/images/7763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\n(A) Hartford\n(B) Frankfort\n(C) Colorado Springs\n(D) Louisville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.125, "loss": 4.272856712341309 }, { "id": "identity_104207", "image": "VQA2.0/images/183847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.328125, "loss": 4.432793617248535 }, { "id": "identity_28587", "image": "A-OKVQA/images/14321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is to the left of the motorcycles?\nChoices:\n(A) dog\n(B) boat\n(C) cat\n(D) person" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.75, "loss": 4.345523357391357 }, { "id": "identity_306263", "image": "AI2D/images/3260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to h?\nChoices:\n(A) lysosomes\n(B) golgi apparatus\n(C) ribosomes\n(D) plasma membrane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.390625, "loss": 4.305063724517822 }, { "id": "identity_266234", "image": "A-OKVQA/images/116004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the presence of people in the middle of the street?\nChoices:\n(A) parade passing\n(B) marathon\n(C) mass protest\n(D) fire sale" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.84375, "loss": 4.6194024085998535 }, { "id": "identity_29155", "image": "PMC-VQA/images/PMC1821330_F2_10049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the upper panel of the image show? \nChoices:\n(A) FKBP52 staining in mild hypospadia patient. \n(B) FKBP52 staining in severe hypospadia patient. \n(C) Control individual with no hypospadia. \n(D) FKBP52 staining in pre-pubertal genital skin. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.234375, "loss": 4.11122989654541 }, { "id": "identity_326818", "image": "A-OKVQA/images/177319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the people riding their bikes?\nChoices:\n(A) school\n(B) park\n(C) bike path\n(D) trail" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.59375, "loss": 4.106956481933594 }, { "id": "identity_283722", "image": "TQA/images/question_images/radioactive_decay_8182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What element is represented by the symbol \"He\"?\nChoices:\n(A) Helium\n(B) Sodium\n(C) Oxygen\n(D) Carbon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.046875, "loss": 4.053243637084961 }, { "id": "identity_22821", "image": "PMC-VQA/images/PMC4671957_fig5_451194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the second column represent in the figure? \nChoices:\n(A) The simulated maps in gray \n(B) The BP assembly by GA \n(C) The resolution of the maps \n(D) The actual maps at resolution 10. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.203125, "loss": 4.2115702629089355 }, { "id": "identity_35549", "image": "A-OKVQA/images/402823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this horse practicing?\nChoices:\n(A) steeplechase\n(B) escape\n(C) posing\n(D) bucking" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.40625, "loss": 4.211177825927734 }, { "id": "identity_280092", "image": "TQA/images/question_images/periodic_table_8158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the element that is tetravalent from the following options:\n(A) Oxygen\n(B) Carbon\n(C) Boron\n(D) Nitrogen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.640213966369629 }, { "id": "identity_305133", "image": "FigureQA/images/81687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.09375, "loss": 4.018388748168945 }, { "id": "identity_306188", "image": "TabMWP/images/16586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.15625, "loss": 4.2170000076293945 }, { "id": "identity_191266", "image": "TextVQA/images/e1a5f789db4e3e20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the pitcher's uniform?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.390625, "loss": 4.2110419273376465 }, { "id": "identity_35010", "image": "DocVQA/images/lywd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Author?" }, { "from": "gpt", "value": "The answer is George Schauf" } ], "clip_score": 20.125, "loss": 5.320271968841553 }, { "id": "identity_327177", "image": "A-OKVQA/images/13893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The fruits in the raised wooden baskets seen here are all what type?\nChoices:\n(A) citrus\n(B) apples\n(C) cherries\n(D) melons" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.03125, "loss": 4.088430881500244 }, { "id": "identity_27062", "image": "PMC-VQA/images/PMC3709099_F2_217511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the technique used to capture the image in (C)? \nChoices:\n(A) SEM \n(B) TEM \n(C) Confocal microscopy \n(D) X-ray crystallography" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.09375, "loss": 4.240212917327881 }, { "id": "identity_296336", "image": "PMC-VQA/images/PMC5811606_Fig7_275466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category do the images belong to?\nChoices:\n(A) MRI images\n(B) PET/CT images\n(C) SPECT/CT images\n(D) X-ray images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.09375, "loss": 4.699219703674316 }, { "id": "identity_281276", "image": "A-OKVQA/images/526414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is manipulated by the white machine? Is it:\n(A) paper\n(B) fabric\n(C) metal\n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.609375, "loss": 4.363302707672119 }, { "id": "identity_301361", "image": "Super-CLEVR/images/superCLEVR_new_001494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large brown items made of the same material as the big gray object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 13.2890625, "loss": 4.996191501617432 }, { "id": "identity_281031", "image": "TextVQA/images/fba0bdf1f6da6a21.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the labels of the green bottles?" }, { "from": "gpt", "value": "The answer is zeta" } ], "clip_score": 14.578125, "loss": 4.594458103179932 }, { "id": "identity_24858", "image": "A-OKVQA/images/59278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How do the cry look that are closest to the camera?\nChoices:\n(A) blurry\n(B) invisible\n(C) mini\n(D) clear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.53125, "loss": 4.138615608215332 }, { "id": "identity_15278", "image": "AI2D/images/238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the herbivores in the above food chain is extinguished, which population will be affected severely.\nChoices:\n(A) Carnivore\n(B) Plant\n(C) Phytoplankton\n(D) Primary producers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.6875, "loss": 4.481990337371826 }, { "id": "identity_335446", "image": "A-OKVQA/images/494968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which city might this motorcycle be used?\nChoices:\n(A) san francisco\n(B) new york\n(C) los angeles\n(D) chicago" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 4.0212554931640625 }, { "id": "identity_288637", "image": "A-OKVQA/images/549849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which activity are these individuals participating?\nChoices:\n(A) sport\n(B) travel\n(C) debate\n(D) celebration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4296875, "loss": 4.377191543579102 }, { "id": "identity_2312", "image": "TQA/images/abc_question_images/parts_skeleton_17256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of X in the figure?\nChoices:\n(A) IT PROTECTS THE MUSCLES\n(B) IT PROTECTS THE HEART\n(C) IT PROTECTS THE LUNGS\n(D) IT PROTECTS THE BRAIN" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.234375, "loss": 4.15602970123291 }, { "id": "identity_197879", "image": "A-OKVQA/images/176571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many statues can be seen on the left side of the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.421875, "loss": 4.99283504486084 }, { "id": "identity_1408", "image": "ScienceQA/images/16670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) opaque\n(B) hard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.265625, "loss": 5.110988140106201 }, { "id": "identity_270597", "image": "FigureQA/images/3070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the median with a high value? \n\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.445115089416504 }, { "id": "identity_269300", "image": "PMC-VQA/images/PMC3755418_fig1_227730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which quadrant of the left breast is the lesion located?\nChoices:\n(A) Upper outer quadrant\n(B) Lower middle quadrant\n(C) Upper middle quadrant\n(D) Lower outer quadrant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.404902458190918 }, { "id": "identity_33664", "image": "PlotQA/images/45791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How are the legend labels stacked ?" }, { "from": "gpt", "value": "The answer is vertical" } ], "clip_score": 22.0, "loss": 4.120658874511719 }, { "id": "identity_217046", "image": "VQA-AS/images/8598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paintings are visible on the wall?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.03125, "loss": 4.460611343383789 }, { "id": "identity_28355", "image": "A-OKVQA/images/241728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is shown?\nChoices:\n(A) residential\n(B) tropical\n(C) city\n(D) country" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.046875, "loss": 4.34108829498291 }, { "id": "identity_12693", "image": "Super-CLEVR/images/superCLEVR_new_000298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same color as the rubber road bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.625, "loss": 4.855293273925781 }, { "id": "identity_132451", "image": "VQA2.0/images/51021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many speakers are there on the desk?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.25, "loss": 4.934199333190918 }, { "id": "identity_19575", "image": "FigureQA/images/43981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.732788562774658 }, { "id": "identity_338158", "image": "VQA-AS/images/8164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is nearest to the left?" }, { "from": "gpt", "value": "The answer is picture" } ], "clip_score": 20.390625, "loss": 4.8672943115234375 }, { "id": "identity_222200", "image": "VQA2.0/images/389137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.609375, "loss": 4.385828495025635 }, { "id": "identity_20572", "image": "PMC-VQA/images/PMC7493419_Fig1_254537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the metabolically active area located? \nChoices:\n(A) Left hilar region \n(B) Right hilar region \n(C) Para-aortic area \n(D) Thoracic region. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.640625, "loss": 4.218441963195801 }, { "id": "identity_3956", "image": "PMC-VQA/images/PMC5880870_Fig2_296385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the warm color represent in the image? \nChoices:\n(A) Decreased rsFC in schizophrenia patients \n(B) Increased rsFC in schizophrenia patients \n(C) No rsFC in schizophrenia patients \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5625, "loss": 4.33079719543457 }, { "id": "identity_184415", "image": "A-OKVQA/images/571141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding cameras in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.234375, "loss": 4.808407306671143 }, { "id": "identity_331054", "image": "PMC-VQA/images/PMC7364197_fig1_214940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead indicate in the image?\nChoices:\n(A) Reactive bone \n(B) Marrow infection \n(C) Fracture position \n(D) Nerve damage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.421875, "loss": 5.007205486297607 }, { "id": "identity_295379", "image": "ScienceQA/images/13405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following colonies is depicted in the image? \n(A) Florida\n(B) Maine\n(C) New Hampshire\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.78125, "loss": 4.605269432067871 }, { "id": "identity_8587", "image": "ScienceQA/images/4636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Massachusetts\n(B) New York\n(C) New Jersey\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.03125, "loss": 5.020223140716553 }, { "id": "identity_143898", "image": "DocVQA/images/rnww0217_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the study being discussed?" }, { "from": "gpt", "value": "The answer is GMA" } ], "clip_score": 19.046875, "loss": 5.38129997253418 }, { "id": "identity_310942", "image": "A-OKVQA/images/247909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of displaying the stuffed animals?\nChoices:\n(A) as trophies\n(B) to appreciate\n(C) to sell\n(D) as art" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.953125, "loss": 4.874425411224365 }, { "id": "identity_306388", "image": "DVQA/images/bar_train_00153678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars?" }, { "from": "gpt", "value": "The answer is ring" } ], "clip_score": 21.28125, "loss": 4.291069984436035 }, { "id": "identity_113585", "image": "VQA2.0/images/324999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is on the ground to the right of the image?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 19.421875, "loss": 4.73270845413208 }, { "id": "identity_332028", "image": "A-OKVQA/images/258132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person's hat orange in color?\nChoices:\n(A) dress code\n(B) visibility\n(C) fashion\n(D) camouflage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.1875, "loss": 4.125666618347168 }, { "id": "identity_327748", "image": "DVQA/images/bar_train_00159604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is teeth" } ], "clip_score": 23.78125, "loss": 4.734888076782227 }, { "id": "identity_3863", "image": "FigureQA/images/75793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.953125, "loss": 4.208675384521484 }, { "id": "identity_264026", "image": "FigureQA/images/24034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam the high median?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.484375, "loss": 4.083920955657959 }, { "id": "identity_126539", "image": "PMC-VQA/images/PMC3386976_pone-0040352-g002_143406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall condition of the retina based on the imaging?" }, { "from": "gpt", "value": "The answer is Retina appears healthy" } ], "clip_score": 32.84375, "loss": 4.064067363739014 }, { "id": "identity_290153", "image": "AI2D/images/1293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the radicle among the following choices: (A) c, (B) d, (C) b, or (D) s?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.609375, "loss": 4.741735935211182 }, { "id": "identity_299785", "image": "PMC-VQA/images/PMC5660978_F1_232545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which specific area is the displayed atrophy located?\nChoices:\n(A) Right frontal lobe\n(B) Left frontal lobe\n(C) Occipital lobe\n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 4.855349540710449 }, { "id": "identity_192600", "image": "VQA2.0/images/423562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.265625, "loss": 4.780943870544434 }, { "id": "identity_320423", "image": "PMC-VQA/images/PMC7590569_Fig2_283324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the ground glass opacity located in this lung? \nChoices:\n(A) Right lower lobe \n(B) Left lower lobe \n(C) Right upper lobe \n(D) Left upper lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.921875, "loss": 4.290722846984863 }, { "id": "identity_302878", "image": "PMC-VQA/images/PMC7682978_F2_303278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the scan of the patient show?\nChoices:\n(A) Kidney stones\n(B) Brain tumor\n(C) Bilateral infiltrations\n(D) Lung nodules." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.46875, "loss": 4.512132167816162 }, { "id": "identity_64374", "image": "AI2D/images/202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.359375, "loss": 4.112700462341309 }, { "id": "identity_18822", "image": "PMC-VQA/images/PMC7402780_F1_226712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate on the image? \nChoices:\n(A) Poor cortical veins around tumor. \n(B) Severe adhesion of tumor to the cortex. \n(C) Mild adhesion of tumor to the cortex. \n(D) Absence of tumor in the cortex. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.953125, "loss": 4.069189548492432 }, { "id": "identity_225918", "image": "A-OKVQA/images/107960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image, and how does it connect to the ground?" }, { "from": "gpt", "value": "The answer is Motorcycle, wheels" } ], "clip_score": 24.0625, "loss": 4.08880090713501 }, { "id": "identity_188144", "image": "A-OKVQA/images/408528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.796875, "loss": 4.405293941497803 }, { "id": "identity_26422", "image": "PMC-VQA/images/PMC4788935_Fig5_10954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of imaging used in the experiment? \nChoices:\n(A) PET \n(B) CT \n(C) MRI \n(D) Fluorescent " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.703125, "loss": 4.298178672790527 }, { "id": "identity_323898", "image": "PMC-VQA/images/PMC5438315_Fig9_166518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most common condition associated with this CT finding?\nChoices:\n(A) Hemangioma\n(B) Varices\n(C) Hematoma\n(D) Phlebolith" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.884919166564941 }, { "id": "identity_182948", "image": "VQA2.0/images/122440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colors are on the kite?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.25, "loss": 4.166625499725342 }, { "id": "identity_7922", "image": "FigureQA/images/34256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.0625, "loss": 4.148868083953857 }, { "id": "identity_278798", "image": "VQA2.0/images/119458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky free from clouds?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 12.1875, "loss": 4.486405372619629 }, { "id": "identity_311410", "image": "TextVQA/images/469acfb4e587bdcd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this translate to \"police\" in English?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 28.34375, "loss": 4.882637023925781 }, { "id": "identity_291024", "image": "PMC-VQA/images/PMC6086494_F1_352649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where could the false aneurysm in the image possibly originate from? \nChoices:\n(A) Aorta\n(B) Carotid artery\n(C) Pulmonary artery\n(D) Femoral artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.65625, "loss": 4.841571807861328 }, { "id": "identity_132739", "image": "TextVQA/images/24ae21fd94f69265.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 14.515625, "loss": 4.4936981201171875 }, { "id": "identity_296406", "image": "ScienceQA/images/14771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Idaho?\n(A) Boise\n(B) Santa Fe\n(C) Nampa\n(D) Nashville" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.890625, "loss": 4.1439738273620605 }, { "id": "identity_262728", "image": "Super-CLEVR/images/superCLEVR_new_001500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cyan rubber aeroplanes behind the small metal object than big red objects to the right of the yellow aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.703125, "loss": 5.074595928192139 }, { "id": "identity_145907", "image": "DocVQA/images/txpp0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the individual from Davenport, Iowa?" }, { "from": "gpt", "value": "The answer is Carl H. Gehrmann" } ], "clip_score": 25.484375, "loss": 4.055155277252197 }, { "id": "identity_223647", "image": "VQA2.0/images/200335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What advertisement is displayed on the side of the bus?" }, { "from": "gpt", "value": "The answer is Double your credit. Double your credit" } ], "clip_score": 22.375, "loss": 4.3810014724731445 }, { "id": "identity_308915", "image": "PMC-VQA/images/PMC4706984_fig3_460653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the imaging in Figure 2 indicate?\nChoices:\n(A) Decreased bowel wall thickness \n(B) Increased bowel wall thickness \n(C) Normal bowel wall thickness \n(D) No bowel loop present" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.09375, "loss": 4.2900872230529785 }, { "id": "identity_273020", "image": "FigureQA/images/70043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest: Firebrick? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.9375, "loss": 4.77536153793335 }, { "id": "identity_277485", "image": "ScienceQA/images/20805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kansas?\n(A) Frankfort\n(B) Des Moines\n(C) Topeka\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.296875, "loss": 4.143824577331543 }, { "id": "identity_292868", "image": "A-OKVQA/images/119614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the breed of this cat:\n(A) persian\n(B) orange tabby\n(C) bengal\n(D) maine coon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.091948986053467 }, { "id": "identity_57178", "image": "FigureQA/images/2731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar represents the second highest value on the graph?" }, { "from": "gpt", "value": "The answer is Khaki" } ], "clip_score": 26.375, "loss": 4.192986011505127 }, { "id": "identity_73013", "image": "ScienceQA/images/15988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs can be seen in the image of the Grevy's zebra?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 35.78125, "loss": 4.312924861907959 }, { "id": "identity_120598", "image": "Super-CLEVR/images/superCLEVR_new_000311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.4375, "loss": 4.272367477416992 }, { "id": "identity_293093", "image": "PMC-VQA/images/PMC5797361_Fig1_271410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What information is displayed in the second column?\nChoices:\n(A) CT tomographic images of the preimplant plan\n(B) Locations of needles before implantation\n(C) Images of the postimplant plan\n(D) Dosimetric evaluations of GTV and adjacent OARs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.640625, "loss": 4.324907302856445 }, { "id": "identity_116684", "image": "A-OKVQA/images/137281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 23.0625, "loss": 4.350043296813965 }, { "id": "identity_262421", "image": "PMC-VQA/images/PMC4570935_fig7_423832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What cellular and vascular changes are depicted in the image?\nChoices:\n(A) Areas with low cellularity and low vascularity.\n(B) Areas with low cellularity and high cellularity.\n(C) Areas with high cellularity and low vascularity.\n(D) Areas with high cellularity and high vascularity." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.9375, "loss": 4.472296714782715 }, { "id": "identity_229414", "image": "VQA2.0/images/283884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main elements visible in the picture, and how do they contribute to the overall scenery?" }, { "from": "gpt", "value": "The answer is Train, mountains, valleys, sky, house, walls, tracks" } ], "clip_score": 24.125, "loss": 4.037714004516602 }, { "id": "identity_307469", "image": "AI2D/images/1026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The image shows various types of ___________ of conifer trees.\nChoices:\n(A) stems\n(B) Roots\n(C) Leaves\n(D) Thorns" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.125, "loss": 4.0934882164001465 }, { "id": "identity_331899", "image": "PMC-VQA/images/PMC5355446_F1_141515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tumor is found within the right foramen of Monro?\nChoices:\n(A) Meningioma\n(B) Astrocytoma\n(C) Hemangioblastoma\n(D) Glioblastoma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.625, "loss": 4.226916313171387 }, { "id": "identity_331869", "image": "IconQA/images/49467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the third image?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.712176322937012 }, { "id": "identity_16419", "image": "A-OKVQA/images/370337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where have the ships stopped?\nChoices:\n(A) near barge\n(B) at lighthouse\n(C) at dock\n(D) on island" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.90625, "loss": 4.00671911239624 }, { "id": "identity_120942", "image": "TextVQA/images/aebbdb5eb8cc3159.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hanging lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.0625, "loss": 4.056379318237305 }, { "id": "identity_127118", "image": "A-OKVQA/images/306431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tent in the background?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 14.703125, "loss": 4.058079719543457 }, { "id": "identity_12443", "image": "ScienceQA/images/352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the bird below.\nChoices:\n(A) ostrich\n(B) Japanese tree frog" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.859375, "loss": 4.527210235595703 }, { "id": "identity_6103", "image": "DVQA/images/bar_train_00121676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain any negative values?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.6875, "loss": 4.194068908691406 }, { "id": "identity_213991", "image": "Super-CLEVR/images/superCLEVR_new_001203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vehicle to the left of the golden car?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 29.15625, "loss": 4.16766881942749 }, { "id": "identity_268457", "image": "AI2D/images/3086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the hair-like structure found on the surface of a cell?\nChoices:\n(A) capsule\n(B) pilus\n(C) ribosome\n(D) cell wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.328125, "loss": 4.016880989074707 }, { "id": "identity_312627", "image": "PMC-VQA/images/PMC5899791_F1_301491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the periapical radiograph?\nChoices:\n(A) Changes related to the alveolar bone.\n(B) Changes related to the enamel.\n(C) Changes related to the dentin.\n(D) Changes related to the root canal treatment." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.21875, "loss": 4.006346702575684 }, { "id": "identity_285863", "image": "A-OKVQA/images/386592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind only the letters SBwa being visible on that sign?\nChoices:\n(A) broken bulbs\n(B) correct name\n(C) spray paint\n(D) fallen letters" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.125, "loss": 4.310945987701416 }, { "id": "identity_321079", "image": "TextVQA/images/36d8f0024be1a1a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the top left label say?" }, { "from": "gpt", "value": "The answer is adult education" } ], "clip_score": 20.796875, "loss": 4.203124523162842 }, { "id": "identity_1615", "image": "TQA/images/question_images/hydrocarbons_8094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The following structures of pentane are what?\nChoices:\n(A) Isomers\n(B) Cyclic\n(C) Alkene\n(D) Chiral" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.71875, "loss": 4.131705284118652 }, { "id": "identity_207699", "image": "A-OKVQA/images/476852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many glasses are on the desk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.578125, "loss": 4.219723701477051 }, { "id": "identity_317999", "image": "DVQA/images/bar_train_00089909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the lowest number of units sold in any store?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 31.421875, "loss": 4.447594165802002 }, { "id": "identity_307538", "image": "Super-CLEVR/images/superCLEVR_new_000641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any small purple rubber dirtbikes here?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.625, "loss": 5.339007377624512 }, { "id": "identity_37406", "image": "A-OKVQA/images/214810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animals are present in the image?\nChoices:\n(A) sheep\n(B) goat\n(C) cow\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.171875, "loss": 4.058197498321533 }, { "id": "identity_321919", "image": "PMC-VQA/images/PMC8039083_F2_415472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method was utilized to verify the diagnosis of tension hydropneumocephalus?\nChoices:\n(A) CT scan\n(B) MRI without contrast\n(C) MRI with contrast (Axial, coronal, and sagittal views)\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.203125, "loss": 4.047319412231445 }, { "id": "identity_305764", "image": "ScienceQA/images/14470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a mineral or a rock?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.03125, "loss": 4.227647304534912 }, { "id": "identity_323958", "image": "PMC-VQA/images/PMC6539121_ijms-20-02332-f002_486355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of this liver?\nChoices:\n(A) Normal\n(B) Cirrhosis\n(C) Fatty liver\n(D) Hemochromatosis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.653276443481445 }, { "id": "identity_328563", "image": "A-OKVQA/images/20251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be used to move several luggage bags?\nChoices:\n(A) servants\n(B) people\n(C) carousel\n(D) luggage cart" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.46875, "loss": 4.0035295486450195 }, { "id": "identity_302557", "image": "AI2D/images/4380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird is represented by letter I?\nChoices:\n(A) Lammergeier\n(B) Northern Goshawk\n(C) Short-toed Eagle\n(D) Secretary Bird" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.421875, "loss": 4.393421173095703 }, { "id": "identity_296468", "image": "FigureQA/images/93429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Chartreuse or Dark Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.421875, "loss": 4.56885290145874 }, { "id": "identity_152080", "image": "ScienceQA/images/19021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bouncy balls are present in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.875, "loss": 4.095554351806641 }, { "id": "identity_304861", "image": "PMC-VQA/images/PMC6086543_F1_352691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of fracture is visible in these radiographs?\nChoices:\n(A) Comminuted \n(B) Spiral \n(C) Oblique \n(D) Transverse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.71875, "loss": 4.183341979980469 }, { "id": "identity_213155", "image": "A-OKVQA/images/190393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the chair have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.21875, "loss": 4.041353225708008 }, { "id": "identity_36025", "image": "A-OKVQA/images/87649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the gold object near the front of the train?\nChoices:\n(A) shield\n(B) handle\n(C) bell\n(D) helmet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.71875, "loss": 4.429039001464844 }, { "id": "identity_8662", "image": "PMC-VQA/images/PMC5658582_mbt212678-fig-0002_231982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the structure labeled B? \nChoices:\n(A) Silica bodies \n(B) Stomata \n(C) Long cells \n(D) Short cells " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.75, "loss": 5.267573833465576 }, { "id": "identity_197042", "image": "VQA2.0/images/74902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umbrella closest to the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.515625, "loss": 4.639654636383057 }, { "id": "identity_300223", "image": "FigureQA/images/91917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Maroon the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.551146030426025 }, { "id": "identity_109005", "image": "PMC-VQA/images/PMC7644528_Fig1_292069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of using two different staining techniques in these images?" }, { "from": "gpt", "value": "The answer is To provide comprehensive tissue analysis" } ], "clip_score": 31.28125, "loss": 4.018661975860596 }, { "id": "identity_315453", "image": "VQA-RAD/images/synpic39141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of this section?" }, { "from": "gpt", "value": "The answer is Mid abdomen" } ], "clip_score": 22.171875, "loss": 4.318604469299316 }, { "id": "identity_326168", "image": "PMC-VQA/images/PMC3648425_F6_203439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in this?\nChoices:\n(A) The direction of some chromomeres.\n(B) The direction of a hybridization signal.\n(C) The location of an RNP-matrix.\n(D) The location of some cytoplasm." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.078125, "loss": 4.5029425621032715 }, { "id": "identity_306903", "image": "FigureQA/images/23301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Cyan?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.765625, "loss": 4.396708011627197 }, { "id": "identity_243690", "image": "A-OKVQA/images/441116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific Asian language is written on the back of the train, and what does it translate to in English?" }, { "from": "gpt", "value": "The answer is Japanese, Local" } ], "clip_score": 30.796875, "loss": 6.024924278259277 }, { "id": "identity_10708", "image": "TQA/images/abc_question_images/human_system_ear_16099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is the outer ear?\nChoices:\n(A) T\n(B) R\n(C) E\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.671875, "loss": 4.284496784210205 }, { "id": "identity_184968", "image": "A-OKVQA/images/79333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building in the background?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 16.546875, "loss": 4.027058124542236 }, { "id": "identity_288596", "image": "FigureQA/images/81678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the most rough?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.984375, "loss": 4.32837438583374 }, { "id": "identity_237542", "image": "AI2D/images/946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the animal shown at letter D and describe one key feature that distinguishes it from the animals shown at letters A and E" }, { "from": "gpt", "value": "The answer is Duck, flat broad beak" } ], "clip_score": 32.625, "loss": 4.973448276519775 }, { "id": "identity_205312", "image": "A-OKVQA/images/376422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the packaging in the bottom row?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 16.140625, "loss": 4.174474239349365 }, { "id": "identity_124948", "image": "PMC-VQA/images/PMC7943055_FIG4_386868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the body is shown in this image?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 23.96875, "loss": 4.09602165222168 }, { "id": "identity_208130", "image": "PMC-VQA/images/PMC5043608_Fig4_79376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the largest structure in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is Irregularly elongated" } ], "clip_score": 17.0625, "loss": 4.0920586585998535 }, { "id": "identity_282386", "image": "TQA/images/abc_question_images/earth_day_night_10096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter represents sunlight in the following choices: (A) M, (B) L, (C) R, or (D) V?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.53125, "loss": 4.494316577911377 }, { "id": "identity_335142", "image": "AI2D/images/879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above diagram, the primary energy source for mackerel is\nChoices:\n(A) phytoplankton\n(B) whale\n(C) zooplankton\n(D) tuna" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.15625, "loss": 4.60922384262085 }, { "id": "identity_253779", "image": "TextVQA/images/a76403bdedd2c6c2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of product is being advertised, and which company developed it?" }, { "from": "gpt", "value": "The answer is Video game by Binary Systems" } ], "clip_score": 17.875, "loss": 4.241023063659668 }, { "id": "identity_20757", "image": "PMC-VQA/images/PMC8146279_diagnostics-11-00772-f006_449234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the diagnostic imaging finding in the lumbar spine compression fracture? \nChoices:\n(A) Hypointensity on T2 and no contrast enhancement. \n(B) Hyperintensity on T2 and vivid contrast enhancement. \n(C) Hypointensity on T2 and vivid contrast enhancement. \n(D) Hyperintensity on T2 and no contrast enhancement. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 4.1061692237854 }, { "id": "identity_30063", "image": "IconQA/images/20502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) impossible\n(B) probable\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.890625, "loss": 4.4075751304626465 }, { "id": "identity_127395", "image": "VQA-RAD/images/synpic19605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the source or uploader of this image?" }, { "from": "gpt", "value": "The answer is Erik S Storm" } ], "clip_score": 20.21875, "loss": 6.187838554382324 }, { "id": "identity_248547", "image": "Super-CLEVR/images/superCLEVR_new_000861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have the same shape as the big yellow object, and what are their colors?" }, { "from": "gpt", "value": "The answer is Zero" } ], "clip_score": 16.75, "loss": 4.172703742980957 }, { "id": "identity_31443", "image": "A-OKVQA/images/536752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What facial expression is the man in the red jacket exhibiting?\nChoices:\n(A) sleeping\n(B) crying\n(C) frowning\n(D) smiling" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.40625, "loss": 4.220569610595703 }, { "id": "identity_273585", "image": "IconQA/images/104229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the sixth one in the sequence, following the first picture of a bear?\nChoices:\n(A) plane\n(B) bear\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.703125, "loss": 4.068722248077393 }, { "id": "identity_130832", "image": "A-OKVQA/images/306663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the building?" }, { "from": "gpt", "value": "The answer is Light grey or white" } ], "clip_score": 16.65625, "loss": 4.446178913116455 }, { "id": "identity_118720", "image": "VQA-AS/images/19673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the curtains?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.359375, "loss": 4.346215724945068 }, { "id": "identity_63527", "image": "ScienceQA/images/1345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of algae are depicted in the food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 31.390625, "loss": 4.05140495300293 }, { "id": "identity_192017", "image": "PMC-VQA/images/PMC6521358_Fig10_481108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the needle in relation to the skin surface?" }, { "from": "gpt", "value": "The answer is Shallow, almost parallel" } ], "clip_score": 25.328125, "loss": 4.107457637786865 }, { "id": "identity_311463", "image": "ScienceQA/images/15880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can happen when this is fertilized?\nChoices:\n(A) It can make pollen.\n(B) It can grow into a mature fern." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.875, "loss": 4.316991329193115 }, { "id": "identity_267273", "image": "A-OKVQA/images/89668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fuel sustains this animal?\nChoices:\n(A) rocks\n(B) plants\n(C) dirt\n(D) meat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.359375, "loss": 4.585559844970703 }, { "id": "identity_317821", "image": "FigureQA/images/56111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7578125, "loss": 4.295960426330566 }, { "id": "identity_337884", "image": "PMC-VQA/images/PMC6797003_FIG1_52259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the blood pools located in the brain?\nChoices:\n(A) Temporal horns \n(B) Occipital horns \n(C) Frontal lobes \n(D) Parietal lobes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.890625, "loss": 4.1264448165893555 }, { "id": "identity_296185", "image": "A-OKVQA/images/125275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What nutrient is present in high levels in the fruit shown? Is it:\n(A) magnesium\n(B) potassium\n(C) vitamin\n(D) vitamin b" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.046875, "loss": 4.103720664978027 }, { "id": "identity_308775", "image": "TextVQA/images/a0993d9d8ce6f776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which platform is displayed at the top?" }, { "from": "gpt", "value": "The answer is facebook" } ], "clip_score": 22.109375, "loss": 4.259397983551025 }, { "id": "identity_281505", "image": "VQA2.0/images/210728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object of the boy's gaze?" }, { "from": "gpt", "value": "The answer is sky" } ], "clip_score": 14.6796875, "loss": 4.184889793395996 }, { "id": "identity_20601", "image": "A-OKVQA/images/268711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the destination of the bus?\nChoices:\n(A) rose mountain\n(B) rosemont\n(C) 197\n(D) mont" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.25, "loss": 4.004631042480469 }, { "id": "identity_326149", "image": "PMC-VQA/images/PMC6688244_F1_21879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain regions show unusually high uptake in the [11C]PiB PET scan typical of Alzheimer's Disease?\nChoices:\n(A) occipital lobes \n(B) frontal and parietal lobes \n(C) hippocampus and amygdala \n(D) cerebellum and brainstem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.203125, "loss": 4.240900039672852 }, { "id": "identity_6145", "image": "AI2D/images/4906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the label 15 represent?\nChoices:\n(A) lava\n(B) core\n(C) tunnel\n(D) ash cloud" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.453125, "loss": 4.37967586517334 }, { "id": "identity_123316", "image": "TextVQA/images/f717dc6f8d6025ab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seafood brand labels can be seen in the freezer section?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.8125, "loss": 4.1249542236328125 }, { "id": "identity_311038", "image": "PMC-VQA/images/PMC6849595_mp13058-fig-0008_66359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used here?\nChoices:\n(A) MRI\n(B) X-ray\n(C) CT scan\n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.732525825500488 }, { "id": "identity_110120", "image": "TextVQA/images/ec7343611237700a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the strap of the watch?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 23.765625, "loss": 4.134852886199951 }, { "id": "identity_269862", "image": "ScienceQA/images/18274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which containers were included in the experimental group in this experiment?\nChoices:\n(A) the containers with worms\n(B) the containers without worms" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8671875, "loss": 4.32226037979126 }, { "id": "identity_27376", "image": "PMC-VQA/images/PMC4902502_F3_40168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the DWI sequences show in the right hippocampus?\nChoices:\n(A) Fluid accumulation \n(B) Decreased signal \n(C) Increased signal \n(D) No signal change " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.09375, "loss": 4.737493991851807 }, { "id": "identity_25098", "image": "PMC-VQA/images/PMC4440813_pone.0125698.g003_388839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the caption of Fig 3C, what is the shape of the superficial cells containing FMDV antigen? \nChoices:\n(A) squamous \n(B) cuboidal \n(C) columnar \n(D) spherical " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.484375, "loss": 4.541591644287109 }, { "id": "identity_327605", "image": "PMC-VQA/images/PMC8289867_Fig5_11991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of this image?\nChoices:\n(A) anterior view\n(B) posterior view\n(C) medial view\n(D) lateral view" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.484375, "loss": 4.884298324584961 }, { "id": "identity_33926", "image": "AI2D/images/921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food chain diagram, if all oak trees died out then\nChoices:\n(A) pine decreses\n(B) mouse decreases\n(C) mouse increase\n(D) kinglet increases" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.15625, "loss": 4.003612518310547 }, { "id": "identity_129755", "image": "VQA2.0/images/509267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.984375, "loss": 4.6416192054748535 }, { "id": "identity_27966", "image": "PMC-VQA/images/PMC6735209_fig3_35618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the soft tissue located in the image? \nChoices:\n(A) Right inner ear \n(B) Left inner ear \n(C) Nose \n(D) Mouth " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 4.261923789978027 }, { "id": "identity_265887", "image": "PMC-VQA/images/PMC5216371_f2_110601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images do the coordinates (f,g) in the figure represent?\nChoices:\n(A) SEM images \n(B) TEM images \n(C) AFM images \n(D) STM images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.388566017150879 }, { "id": "identity_144900", "image": "DocVQA/images/hzbn0226_54.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the email?" }, { "from": "gpt", "value": "The answer is naphthalene - OMB request for briefing" } ], "clip_score": 25.21875, "loss": 4.632282257080078 }, { "id": "identity_86339", "image": "Geometry3K/images/32.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m∠11 is twice the measure of m∠12, what is the value of x?" }, { "from": "gpt", "value": "The answer is No solution" } ], "clip_score": 30.59375, "loss": 4.106522560119629 }, { "id": "identity_37280", "image": "TextVQA/images/629a91565455ec26.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what company is adverised on the main display?" }, { "from": "gpt", "value": "The answer is chi" } ], "clip_score": 19.234375, "loss": 4.744373321533203 }, { "id": "identity_335217", "image": "ScienceQA/images/9692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) sour\n(B) stretchy\n(C) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.5234375, "loss": 4.558506965637207 }, { "id": "identity_11782", "image": "TextVQA/images/546e27f604a7a342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the last word on the banner?" }, { "from": "gpt", "value": "The answer is paradise" } ], "clip_score": 12.734375, "loss": 5.425610065460205 }, { "id": "identity_205543", "image": "VQA2.0/images/449776.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many balloons are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.75, "loss": 4.467194080352783 }, { "id": "identity_267946", "image": "TQA/images/abc_question_images/parts_worm_17346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the worm is labeled as Y? Is it the intestine, ovary, anus, or eggs?\nChoices:\n(A) INTESTINE\n(B) OVARY\n(C) ANUS\n(D) EGGS" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.328125, "loss": 4.6804986000061035 }, { "id": "identity_12713", "image": "Super-CLEVR/images/superCLEVR_new_000358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the matte thing that is in front of the blue matte object and the matte thing behind the small matte cruiser have the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.28125, "loss": 5.392336845397949 }, { "id": "identity_323265", "image": "A-OKVQA/images/445388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the sign with the arrow and line mean?\nChoices:\n(A) no parking\n(B) no crossing\n(C) no exit\n(D) no turns" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.4375, "loss": 4.070648193359375 }, { "id": "identity_92850", "image": "IconQA/images/7426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which picture is fourth in the sequence?" }, { "from": "gpt", "value": "The answer is A plane" } ], "clip_score": 20.265625, "loss": 4.200329303741455 }, { "id": "identity_300070", "image": "PMC-VQA/images/PMC7806191_FIG3_341924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in this area?\nChoices:\n(A) Anterior \n(B) Lateral \n(C) Posterior \n(D) Superior" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.796875, "loss": 4.658225059509277 }, { "id": "identity_155267", "image": "ScienceQA/images/18353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are highlighted in green on the map?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.140625, "loss": 4.806607246398926 }, { "id": "identity_28800", "image": "A-OKVQA/images/393087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material composes this old square tower?\nChoices:\n(A) brick\n(B) wood\n(C) cobblestone\n(D) mud" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.265625, "loss": 4.435689449310303 }, { "id": "identity_38379", "image": "PMC-VQA/images/PMC7812995_ccr33495-fig-0005_344499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lymph node regions are shown in the CT image after chemoradiotherapy? \nChoices:\n(A) External iliac \n(B) Axillary \n(C) Inguinal \n(D) Facial " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.890625, "loss": 4.768902778625488 }, { "id": "identity_296079", "image": "A-OKVQA/images/27777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the lines behind the car at the bottom signify?\nChoices:\n(A) turning left\n(B) slowing down\n(C) turning right\n(D) no parking" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 5.66796875, "loss": 4.147744655609131 }, { "id": "identity_204836", "image": "A-OKVQA/images/324082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many American flags can be seen in this image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.3125, "loss": 4.153896808624268 }, { "id": "identity_117616", "image": "Super-CLEVR/images/superCLEVR_new_001807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the pink and gray object in the bottom right corner?" }, { "from": "gpt", "value": "The answer is A trailer" } ], "clip_score": 26.9375, "loss": 4.296706676483154 }, { "id": "identity_261270", "image": "ScienceQA/images/7084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which month typically experiences the highest average rainfall in Cairo?\nChoices:\n(A) January\n(B) March\n(C) July" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.3125, "loss": 4.227335453033447 }, { "id": "identity_78018", "image": "AI2D/images/4620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of Plagioclase are shown in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.140625, "loss": 4.362120628356934 }, { "id": "identity_23759", "image": "ScienceQA/images/6513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Vermont\n(B) New York\n(C) New Jersey\n(D) Mississippi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.15625, "loss": 4.940177917480469 }, { "id": "identity_262241", "image": "TQA/images/question_images/earth_moon_phases_6006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which moon phase is farthest from the sun among the following options: Full moon, Waxing crescent moon, New moon, or Third quarter moon?\nChoices:\n(A) Full moon\n(B) Waxing crescent moon\n(C) New moon\n(D) Third quarter moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.890625, "loss": 4.393536567687988 }, { "id": "identity_329602", "image": "PMC-VQA/images/PMC6149721_molecules-22-02208-f005_370226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the imaging technique used in this study?\nChoices:\n(A) CT scanning \n(B) Mammography \n(C) MR imaging \n(D) PET imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.859375, "loss": 4.054203987121582 }, { "id": "identity_4887", "image": "PMC-VQA/images/PMC4048546_f0005_295508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI image showing? \nChoices:\n(A) Presence of iron deposition \n(B) Absence of iron deposition \n(C) Enlargement of the cerebellum \n(D) Increased blood flow to the brain " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.765625, "loss": 4.732814311981201 }, { "id": "identity_11560", "image": "IconQA/images/99647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.265625, "loss": 4.289782524108887 }, { "id": "identity_315266", "image": "VizWiz/images/VizWiz_train_00017077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this?" }, { "from": "gpt", "value": "The answer is variety desert sage most similar to salvia gracias hybrid sage" } ], "clip_score": 30.078125, "loss": 4.984861850738525 }, { "id": "identity_298376", "image": "A-OKVQA/images/287305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of area is this categorized as?\nChoices:\n(A) tropical\n(B) residential\n(C) country\n(D) city" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.96875, "loss": 4.088263988494873 }, { "id": "identity_8280", "image": "FigureQA/images/97009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 4.398694038391113 }, { "id": "identity_291525", "image": "PMC-VQA/images/PMC6843440_medicina-55-00633-f003_65471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which characteristics of the vertebral lesion are visible in the images provided in the caption?\nChoices:\n(A) Involvement of the anterior wall \n(B) An epidural mass \n(C) Osteoporosis \n(D) Normal bone structure" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.125, "loss": 4.028285026550293 }, { "id": "identity_205951", "image": "PMC-VQA/images/PMC7296345_fig5_192972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the anatomical position?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 29.34375, "loss": 4.602298259735107 }, { "id": "identity_22739", "image": "IconQA/images/35161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.3125, "loss": 4.363397121429443 }, { "id": "identity_328631", "image": "ScienceQA/images/17944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Jackson\n(B) Biloxi\n(C) Cambridge\n(D) Portland" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 4.302792072296143 }, { "id": "identity_329421", "image": "PMC-VQA/images/PMC5476429_fig8_180437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to create this 3D image?\nChoices:\n(A) Confocal microscopy\n(B) Transmission electron microscopy\n(C) Scanning electron microscopy\n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.4069414138793945 }, { "id": "identity_19105", "image": "FigureQA/images/94952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.78125, "loss": 4.359051704406738 }, { "id": "identity_268710", "image": "Super-CLEVR/images/superCLEVR_new_000365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the large rubber object located in front of the matte object to the right of the scooter?" }, { "from": "gpt", "value": "The answer is school bus" } ], "clip_score": 25.3125, "loss": 5.108298301696777 }, { "id": "identity_288980", "image": "A-OKVQA/images/287105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the brown color of the hardware on the doors?\nChoices:\n(A) rust\n(B) patina\n(C) stain\n(D) paint" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3828125, "loss": 4.457785129547119 }, { "id": "identity_2546", "image": "PMC-VQA/images/PMC7402021_mrm28303-fig-0004_226287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the magnetic field with respect to the head? \nChoices:\n(A) Parallel to the head \n(B) Perpendicular to the head \n(C) Tangent to the head \n(D) Antiparallel to the head " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.578125, "loss": 4.326286315917969 }, { "id": "identity_294276", "image": "PMC-VQA/images/PMC4332747_Fig4_359939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to assess the dislodged bone cement among the following options: (A) Magnetic Resonance Imaging (MRI), (B) Computed Tomography (CT) scan, (C) X-ray, or (D) Ultrasound?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.21875, "loss": 4.024880886077881 }, { "id": "identity_33184", "image": "PMC-VQA/images/PMC7482117_f2_251602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to capture the images in A and B? \nChoices:\n(A) Computed tomography (CT) \n(B) Magnetic resonance imaging (MRI) \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.058197975158691 }, { "id": "identity_21431", "image": "FigureQA/images/74728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Deep Sky Blue have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.171875, "loss": 4.439387321472168 }, { "id": "identity_262917", "image": "FigureQA/images/95106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Bubblegum or Steel Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.033761024475098 }, { "id": "identity_319874", "image": "ScienceQA/images/10632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbia\n(B) Frankfort\n(C) Bismarck\n(D) Denver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.515625, "loss": 4.3438520431518555 }, { "id": "identity_51857", "image": "FigureQA/images/56809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the second highest value on the graph?" }, { "from": "gpt", "value": "The answer is Coral" } ], "clip_score": 24.0, "loss": 4.339077472686768 }, { "id": "identity_18014", "image": "FigureQA/images/92705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.0, "loss": 4.241794586181641 }, { "id": "identity_283069", "image": "AI2D/images/4045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the hottest part from the following options:\n(A) core\n(B) none\n(C) envelope\n(D) flair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.046875, "loss": 4.389510154724121 }, { "id": "identity_304374", "image": "TextVQA/images/2163e764c1e571ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sticker above this say?" }, { "from": "gpt", "value": "The answer is urbancritters" } ], "clip_score": 20.96875, "loss": 4.093858242034912 }, { "id": "identity_267302", "image": "PMC-VQA/images/PMC5977127_jcm-07-00088-f003_321235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the hematoma located?\nChoices:\n(A) Right thigh\n(B) Left thigh\n(C) Right arm\n(D) Left arm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.122806072235107 }, { "id": "identity_249520", "image": "VQA2.0/images/292087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bikes with sidecars are present in the image, and what is the total number of wheels visible on these bikes?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.5, "loss": 4.427797794342041 }, { "id": "identity_320646", "image": "PMC-VQA/images/PMC8933768_brb32490-fig-0003_226526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of injury is associated with small hemorrhagic lesions in these images?\nChoices:\n(A) Epidural hematoma\n(B) Subdural hematoma\n(C) Diffuse axonal injury\n(D) Concussion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 4.53409481048584 }, { "id": "identity_189550", "image": "VQA-AS/images/14719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the boy's pants?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.546875, "loss": 4.347986221313477 }, { "id": "identity_338550", "image": "ScienceQA/images/1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is farthest north?\nChoices:\n(A) West Virginia\n(B) Louisiana\n(C) Arizona\n(D) Oklahoma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.984375, "loss": 4.274345397949219 }, { "id": "identity_309210", "image": "Super-CLEVR/images/superCLEVR_new_000116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the red shiny truck the same as the cyan aircraft?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.625, "loss": 5.1523356437683105 }, { "id": "identity_316620", "image": "DocVQA/images/jlxm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: WHAT ACTIONS ARE NECESSARY BEFORE AND AFTER CONNECTING WITH SILANIZED PRESS FIT UNION?" }, { "from": "gpt", "value": "The answer is performance test" } ], "clip_score": 21.703125, "loss": 5.86053991317749 }, { "id": "identity_36387", "image": "PMC-VQA/images/PMC4842356_fig3_23762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the narrowing of the artery? \nChoices:\n(A) Infection \n(B) Congenital defect \n(C) Thrombus \n(D) Trauma " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.625, "loss": 4.0560688972473145 }, { "id": "identity_336176", "image": "A-OKVQA/images/232011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the item the person on the right side is holding protect against?\nChoices:\n(A) rain\n(B) vampires\n(C) bears\n(D) mosquitos" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3125, "loss": 4.659762382507324 }, { "id": "identity_274549", "image": "FigureQA/images/50035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.78125, "loss": 4.443695068359375 }, { "id": "identity_264567", "image": "PMC-VQA/images/PMC5736520_Fig8_254782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aperture size was utilized to capture the images?\nOptions:\n(A) Small (100mm)\n(B) Medium (200mm)\n(C) Large (300mm)\n(D) Extra-large (400mm)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.9921875, "loss": 4.110085487365723 }, { "id": "identity_206854", "image": "Super-CLEVR/images/superCLEVR_new_001686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is to the left of the small brown chopper?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 19.734375, "loss": 4.1635918617248535 }, { "id": "identity_23047", "image": "PMC-VQA/images/PMC1784065_pone-0000206-g002_9112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is present in the epithelium cells of the gill? \nChoices:\n(A) Bacteria \n(B) Gills \n(C) Follicle \n(D) Nerve fibers " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.017017364501953 }, { "id": "identity_298087", "image": "ScienceQA/images/12908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following factors served as an independent variable in this experiment?\nChoices:\n(A) the type of flour used\n(B) the volume of the cupcakes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.75, "loss": 4.209156036376953 }, { "id": "identity_285459", "image": "PMC-VQA/images/PMC4594743_Fig3_430082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MRI sequence is aligned parallel to the long axis of the hippocampus?\nChoices:\n(A) Coronal T2W\n(B) Transverse T2W\n(C) Sagittal T1W\n(D) Axial T1W" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.84375, "loss": 4.285416126251221 }, { "id": "identity_338569", "image": "TextVQA/images/1a3ff09978212117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first letter in the title of the book with an animal?" }, { "from": "gpt", "value": "The answer is k" } ], "clip_score": 16.625, "loss": 4.051598072052002 }, { "id": "identity_319258", "image": "ScienceQA/images/13728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is a stuffed rabbit?\nChoices:\n(A) a solid\n(B) a liquid\n(C) a gas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.3125, "loss": 4.150542259216309 }, { "id": "identity_123492", "image": "VQA2.0/images/199487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap worn by the batter?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 17.828125, "loss": 4.03392219543457 }, { "id": "identity_278636", "image": "DocVQA/images/txpp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the olive badge?" }, { "from": "gpt", "value": "The answer is nonmember" } ], "clip_score": 13.5546875, "loss": 6.355838775634766 }, { "id": "identity_1242", "image": "AI2D/images/3079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is represented by the label A?\nChoices:\n(A) Golgi apparatus\n(B) Ribosomes\n(C) Cell membrane\n(D) Mitochondria" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.28125, "loss": 4.0880022048950195 }, { "id": "identity_206978", "image": "A-OKVQA/images/385448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many scooters are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.90625, "loss": 4.254673480987549 }, { "id": "identity_36493", "image": "PMC-VQA/images/PMC7201798_Fig1_164065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was found in the CT examination of the patient? \nChoices:\n(A) A small mass in the anterior mediastinum \n(B) A large mass in the posterior mediastinum \n(C) A large mass in the anterior mediastinum \n(D) No mass was found in the mediastinum. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.75, "loss": 4.039589881896973 }, { "id": "identity_205795", "image": "A-OKVQA/images/485440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be seen in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.53125, "loss": 4.277470588684082 }, { "id": "identity_294013", "image": "A-OKVQA/images/32060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the vehicles parked near the bar primarily used?\nChoices:\n(A) racing\n(B) delivery\n(C) farming\n(D) public transport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.421875, "loss": 4.245932579040527 }, { "id": "identity_336854", "image": "PMC-VQA/images/PMC4921494_F2_45002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the thickness of these cross-sections?\nChoices:\n(A) 1 μm \n(B) 5 μm \n(C) 10 μm \n(D) 15 μm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.359375, "loss": 4.04038143157959 }, { "id": "identity_332337", "image": "A-OKVQA/images/346026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the birds doing?\nChoices:\n(A) singing\n(B) sleeping\n(C) finding food\n(D) resting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.671875, "loss": 4.505700588226318 }, { "id": "identity_112373", "image": "A-OKVQA/images/361885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fireplace hearth?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 20.40625, "loss": 4.6219563484191895 }, { "id": "identity_300584", "image": "PMC-VQA/images/PMC8020193_FIG1_409560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where in the image is the condition located?\nChoices:\n(A) Base of the right neck \n(B) Base of the left neck \n(C) Center of the chest \n(D) Base of the spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.71875, "loss": 4.500727653503418 }, { "id": "identity_186625", "image": "PMC-VQA/images/PMC4366375_pone.0118513.g001_369686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.25, "loss": 4.65774393081665 }, { "id": "identity_323381", "image": "A-OKVQA/images/536725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of pants is this person wearing?\nChoices:\n(A) corduroy\n(B) cargo pants\n(C) jeans\n(D) dress pants" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.390625, "loss": 4.286062240600586 }, { "id": "identity_288735", "image": "A-OKVQA/images/393036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which movie does the scene bear the closest resemblance to among the following options: (A) True Grit, (B) The Matrix, (C) Fight Club, or (D) American Psycho?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.6484375, "loss": 4.396897315979004 }, { "id": "identity_259726", "image": "ScienceQA/images/11294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its neighboring countries or territories within a 1000 km radius" }, { "from": "gpt", "value": "The answer is Kiribati, the Marshall Islands" } ], "clip_score": 35.21875, "loss": 4.00392484664917 }, { "id": "identity_158148", "image": "AI2D/images/987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the fruit type that opens with a lid to release seeds?" }, { "from": "gpt", "value": "The answer is Poricidal capsule" } ], "clip_score": 25.03125, "loss": 4.118424415588379 }, { "id": "identity_263811", "image": "A-OKVQA/images/103393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is located to the left of the cone among the following choices: helicopter, bicycle, television, or bear?\nChoices:\n(A) helicopter\n(B) bicycle\n(C) television\n(D) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.546875, "loss": 4.460056781768799 }, { "id": "identity_207397", "image": "A-OKVQA/images/350059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the locomotive?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.984375, "loss": 4.203654766082764 }, { "id": "identity_285396", "image": "TextVQA/images/38451ec391f20ab5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this an IPA?" }, { "from": "gpt", "value": "The answer is amish way" } ], "clip_score": 27.625, "loss": 6.562266826629639 }, { "id": "identity_301583", "image": "DVQA/images/bar_train_00169980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this stack of bars?" }, { "from": "gpt", "value": "The answer is game" } ], "clip_score": 16.6875, "loss": 4.277651786804199 }, { "id": "identity_294924", "image": "PMC-VQA/images/PMC7362644_Fig1_214575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which limbs exhibit a neurogenic pattern?\nOptions:\n(A) Lower limbs\n(B) Upper limbs\n(C) Both limbs\n(D) No specific limb" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.15625, "loss": 4.349646091461182 }, { "id": "identity_29390", "image": "PMC-VQA/images/PMC8008854_f1_406334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate in the image? \nChoices:\n(A) Absence of air in the mediastinum \n(B) Presence of air in the mediastinum \n(C) Presence of fluid in the mediastinum \n(D) Enlargement of the heart. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.607178688049316 }, { "id": "identity_35427", "image": "PMC-VQA/images/PMC7607047_f1_289685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is represented by the number 4 in the image? \nChoices:\n(A) psoas \n(B) quadratus lumborum \n(C) latissimus dorsi \n(D) rectus abdominis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.828125, "loss": 4.050058841705322 }, { "id": "identity_281882", "image": "TabMWP/images/20965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which destination on the hiking trails near Javier's campground has a higher elevation from sea level, the underground lake in the small cave or the mountain peak?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.53125, "loss": 4.311605453491211 }, { "id": "identity_155128", "image": "ScienceQA/images/17598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.71875, "loss": 4.181631088256836 }, { "id": "identity_330960", "image": "A-OKVQA/images/492347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what direction will the train go next with respect to the person taking this picture?\nChoices:\n(A) south\n(B) north\n(C) south\n(D) east" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.046875, "loss": 4.026825428009033 }, { "id": "identity_31676", "image": "IconQA/images/60950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.09375, "loss": 4.458762168884277 }, { "id": "identity_234343", "image": "Geometry3K/images/1423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\(\\angle V\\)" }, { "from": "gpt", "value": "The answer is 193" } ], "clip_score": 29.5625, "loss": 4.033021926879883 }, { "id": "identity_321705", "image": "DVQA/images/bar_train_00130756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is lap" } ], "clip_score": 23.0, "loss": 4.11328649520874 }, { "id": "identity_29902", "image": "PMC-VQA/images/PMC7267220_F5_182993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image estimate? \nChoices:\n(A) Brain activity \n(B) Brain weight \n(C) Subcortical structural volume \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.140625, "loss": 4.098313331604004 }, { "id": "identity_296942", "image": "PMC-VQA/images/PMC6255699_F1_400724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of tissue displayed in the cross sections from the following options:\n(A) phloem tissue\n(B) xylem tissue\n(C) epidermis tissue\n(D) ground tissue." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.78125, "loss": 4.834791660308838 }, { "id": "identity_1402", "image": "TextVQA/images/bb62f2d28c307de2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what language is present in the ad?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.1953125, "loss": 4.368910312652588 }, { "id": "identity_314315", "image": "ScienceQA/images/7817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) bouncy\n(B) transparent\n(C) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.078125, "loss": 4.3144426345825195 }, { "id": "identity_282275", "image": "A-OKVQA/images/265211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the background on the sign featuring the muscle man?\nOptions:\n(A) green\n(B) white\n(C) orange\n(D) red" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.46875, "loss": 4.136525630950928 }, { "id": "identity_253964", "image": "TextVQA/images/24bec4d1080a9171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the full word written on the boy's shirt?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 19.765625, "loss": 5.072868347167969 }, { "id": "identity_119304", "image": "VQA-AS/images/525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Fair weather" } ], "clip_score": 22.171875, "loss": 4.549117565155029 }, { "id": "identity_289657", "image": "ScienceQA/images/13107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the texture of this object that matches one of the following properties: scratchy or smooth?\nChoices:\n(A) scratchy\n(B) smooth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5, "loss": 4.231029987335205 }, { "id": "identity_209370", "image": "PMC-VQA/images/PMC5893493_Fig9_299522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abnormality indicated by the straight arrow?" }, { "from": "gpt", "value": "The answer is Foreign body or calcification" } ], "clip_score": 23.828125, "loss": 4.083845138549805 }, { "id": "identity_22409", "image": "A-OKVQA/images/264805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which setting on his camera phone will cause harm to his eyes when it is on?\nChoices:\n(A) flash\n(B) lightbulb\n(C) speaker\n(D) camera" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.2421875, "loss": 4.313243389129639 }, { "id": "identity_302257", "image": "FigureQA/images/85078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.21875, "loss": 4.048619270324707 }, { "id": "identity_263952", "image": "A-OKVQA/images/221446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to collectively refer to the food options listed above?\nChoices:\n(A) fruits\n(B) meat\n(C) nuts\n(D) veggies" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.265625, "loss": 4.108578205108643 }, { "id": "identity_264466", "image": "FigureQA/images/737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.78125, "loss": 4.0302629470825195 }, { "id": "identity_291459", "image": "PMC-VQA/images/PMC7003532_Fig5_112043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to acquire postoperative images? \nOptions:\n(A) MRI\n(B) CT\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.40625, "loss": 4.785738945007324 }, { "id": "identity_194572", "image": "TextVQA/images/286d683da3dcfece.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flavor of the drink advertised on the small sign?" }, { "from": "gpt", "value": "The answer is Crystal Sunset" } ], "clip_score": 18.078125, "loss": 4.611728668212891 }, { "id": "identity_69904", "image": "AI2D/images/2237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many nymph stages are shown for the grasshopper in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.90625, "loss": 4.548503398895264 }, { "id": "identity_277631", "image": "FigureQA/images/15638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.46875, "loss": 4.32492733001709 }, { "id": "identity_276493", "image": "PMC-VQA/images/PMC4884850_fig3_35588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the thick solid light green line in the merged image represent?\nChoices:\n(A) Bladder \n(B) Prostate \n(C) Urethra \n(D) Rectum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.171875, "loss": 4.690730094909668 }, { "id": "identity_7489", "image": "AI2D/images/1026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The image shows different types of ___________ of conifer trees.\nChoices:\n(A) stems\n(B) Roots\n(C) Leaves\n(D) Thorns" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.15625, "loss": 4.0762457847595215 }, { "id": "identity_303321", "image": "Super-CLEVR/images/superCLEVR_new_001206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the red bus made of the same material as the purple cruiser in front of the green metallic tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.203125, "loss": 5.021944999694824 }, { "id": "identity_12564", "image": "FigureQA/images/43461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple greater than Tomato?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.34375, "loss": 4.048050880432129 }, { "id": "identity_128336", "image": "PMC-VQA/images/PMC3447726_F4_156431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the 'LE' label indicate in the image?" }, { "from": "gpt", "value": "The answer is Lumen" } ], "clip_score": 21.375, "loss": 4.1773834228515625 }, { "id": "identity_25102", "image": "PMC-VQA/images/PMC3677018_fig1_210063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are shown by the red arrows in the image?\nChoices:\n(A) Cysts in the right testis \n(B) Cysts in the left testis \n(C) Cysts in the external iliac artery \n(D) Cysts in the obturator canal " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.71875, "loss": 4.290298938751221 }, { "id": "identity_307331", "image": "A-OKVQA/images/25997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person holding their hand out?\nChoices:\n(A) is pointing\n(B) to balance\n(C) is confused\n(D) to catch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.5859375, "loss": 4.1583967208862305 }, { "id": "identity_42813", "image": "DocVQA/images/hqvw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the journal name for the manuscript titled \"P3(5)\"?" }, { "from": "gpt", "value": "The answer is TBD" } ], "clip_score": 13.125, "loss": 4.719505786895752 }, { "id": "identity_329019", "image": "DVQA/images/bar_train_00062501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does this color represent?" }, { "from": "gpt", "value": "The answer is need" } ], "clip_score": 19.09375, "loss": 4.260633945465088 }, { "id": "identity_1602", "image": "FigureQA/images/76711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon greater than Dark Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.053465366363525 }, { "id": "identity_297006", "image": "A-OKVQA/images/558968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the individuals currently seated? \nOptions:\n(A) deck\n(B) living room\n(C) bathroom\n(D) bedroom" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.28125, "loss": 4.4555864334106445 }, { "id": "identity_188005", "image": "PMC-VQA/images/PMC6502287_FIG1_475685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.328125, "loss": 4.5771989822387695 }, { "id": "identity_315088", "image": "FigureQA/images/29896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.640625, "loss": 4.115612983703613 }, { "id": "identity_274461", "image": "TQA/images/question_images/cycle_rock_6722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of rocks, characterized by minerals and global chemistry, provide insights into the composition of the mantle?\nChoices:\n(A) Rocks at surface\n(B) Metamorphic Rock\n(C) Sedimentary Rock\n(D) Igenous rock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.03125, "loss": 4.54902458190918 }, { "id": "identity_31647", "image": "IconQA/images/82333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape open or closed?\nChoices:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.128608226776123 }, { "id": "identity_293401", "image": "ScienceQA/images/8347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Idaho?\nChoices:\n(A) Birmingham\n(B) Kansas City\n(C) Boise\n(D) Nampa" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.84375, "loss": 4.136509895324707 }, { "id": "identity_335633", "image": "PMC-VQA/images/PMC5496392_Fig4_186289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the V sign indicate on these radiographs?\nChoices:\n(A) position of the spine \n(B) location of a fracture \n(C) alignment of the hips \n(D) angle of the knee" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.734375, "loss": 4.667680740356445 }, { "id": "identity_231291", "image": "PMC-VQA/images/PMC5569414_F2_206478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the brain shows hyper-intensities consistent with PRES, and what are the primary functions associated with this brain region?" }, { "from": "gpt", "value": "The answer is Visual processing" } ], "clip_score": 30.15625, "loss": 5.744251728057861 }, { "id": "identity_311021", "image": "ScienceQA/images/11864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Raleigh\n(B) Harrisburg\n(C) Charlotte\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 4.193541526794434 }, { "id": "identity_132417", "image": "VQA2.0/images/49640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.859375, "loss": 4.312739372253418 }, { "id": "identity_290995", "image": "VQA-AS/images/2947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the weather partly cloudy?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.09375, "loss": 4.0741963386535645 }, { "id": "identity_272478", "image": "PMC-VQA/images/PMC7896074_Fig7_371714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red dotted line in figure A represent?\nChoices:\n(A) Osteoclasts \n(B) Muscles \n(C) Osteoblasts \n(D) Brain matter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.875, "loss": 4.686356544494629 }, { "id": "identity_219851", "image": "TextVQA/images/9885ee997e817f37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name visible on the products and promotional materials?" }, { "from": "gpt", "value": "The answer is SD Codefree" } ], "clip_score": 22.015625, "loss": 5.068642616271973 }, { "id": "identity_10785", "image": "ScienceQA/images/336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Idaho?\nChoices:\n(A) Boise\n(B) Topeka\n(C) Juneau\n(D) Buffalo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 4.279292583465576 }, { "id": "identity_20904", "image": "A-OKVQA/images/160968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the metal shapes attached to the front of the horse's breast collar?\nChoices:\n(A) id tags\n(B) breed certifications\n(C) police badges\n(D) trophy plaques" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.78125, "loss": 4.215373992919922 }, { "id": "identity_20710", "image": "IconQA/images/68912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a star. Which picture is second?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.75, "loss": 4.0417890548706055 }, { "id": "identity_209207", "image": "A-OKVQA/images/388933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people besides the main subject are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.875, "loss": 4.822115421295166 }, { "id": "identity_316400", "image": "FigureQA/images/61387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.46875, "loss": 4.680384159088135 }, { "id": "identity_272628", "image": "PMC-VQA/images/PMC5742149_F2_256651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) MRI images\n(B) CT images\n(C) X-ray images\n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 4.359544277191162 }, { "id": "identity_293320", "image": "TabMWP/images/20001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the past four days, an ice cream shop recorded the number of ice cream cones sold each day. On which day did the shop sell the highest number of ice cream cones?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.15625, "loss": 4.038743019104004 }, { "id": "identity_336937", "image": "A-OKVQA/images/383930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person wearing?\nChoices:\n(A) bow tie\n(B) hat\n(C) scarf\n(D) suspenders" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.3046875, "loss": 4.624262809753418 }, { "id": "identity_298728", "image": "PMC-VQA/images/PMC4730158_f4_466715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture this image? \nOptions:\n(A) SEM\n(B) TEM\n(C) AFM\n(D) XRD" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.109375, "loss": 4.374272346496582 }, { "id": "identity_331718", "image": "PMC-VQA/images/PMC3128610_pone-0021749-g004_101311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrowheads indicate in these panels?\nChoices:\n(A) thick bundled pilus structures\n(B) membrane blebs\n(C) higher magnification\n(D) negative staining" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.328125, "loss": 4.714018821716309 }, { "id": "identity_300542", "image": "VQA2.0/images/70363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the weather clear?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 12.8984375, "loss": 4.118897914886475 }, { "id": "identity_120357", "image": "VizWiz/images/VizWiz_train_00000570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object's tip?" }, { "from": "gpt", "value": "The answer is Conical" } ], "clip_score": 21.171875, "loss": 4.1320481300354 }, { "id": "identity_265820", "image": "AI2D/images/462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the snake population increases, what is the most likely outcome for the lizard population?\nChoices:\n(A) Increase\n(B) Hard to say\n(C) Decrease\n(D) Stay the same" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.25, "loss": 4.0648298263549805 }, { "id": "identity_30662", "image": "TabMWP/images/9519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.875, "loss": 4.160506248474121 }, { "id": "identity_307992", "image": "FigureQA/images/12547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the amount of Orange Red smaller than the amount of Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.34375, "loss": 4.108980655670166 }, { "id": "identity_259563", "image": "TextVQA/images/0f2848616d942d96.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars in the image have license plates from Texas, and what are the colors of these cars?" }, { "from": "gpt", "value": "The answer is Two red cars" } ], "clip_score": 29.15625, "loss": 4.536081314086914 }, { "id": "identity_264805", "image": "PMC-VQA/images/PMC8908670_Fig2_217519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the preoperative imaging of the 22-year-old male osteosarcoma patient, where was the lesion located?\nChoices:\n(A) Distal femur\n(B) Proximal femur\n(C) Tibia\n(D) Humerus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.375, "loss": 4.320487022399902 }, { "id": "identity_329304", "image": "TQA/images/question_images/human_system_nervous_6170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain is responsible for something related to movement?\nChoices:\n(A) occipital lobe\n(B) frontal lobe\n(C) parietal lobe\n(D) temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.796875, "loss": 4.704388618469238 }, { "id": "identity_30480", "image": "A-OKVQA/images/470984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most efficient way back up the hill?\nChoices:\n(A) running\n(B) walking\n(C) ski lift\n(D) drive" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.782375335693359 }, { "id": "identity_307038", "image": "FigureQA/images/16867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.140625, "loss": 4.064970016479492 }, { "id": "identity_292079", "image": "TabMWP/images/3389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.8125, "loss": 4.708054542541504 }, { "id": "identity_65299", "image": "TQA/images/question_images/parts_microscope_7202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does one adjust the focus for a sharper image on this microscope?" }, { "from": "gpt", "value": "The answer is Fine focus" } ], "clip_score": 30.734375, "loss": 4.602215766906738 }, { "id": "identity_30103", "image": "VizWiz/images/VizWiz_train_00013527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is it?" }, { "from": "gpt", "value": "The answer is table red top" } ], "clip_score": 19.265625, "loss": 4.780313014984131 }, { "id": "identity_301545", "image": "ScienceQA/images/16793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these four items share?\nChoices:\n(A) slippery\n(B) sour\n(C) soft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 4.0549516677856445 }, { "id": "identity_111647", "image": "VizWiz/images/VizWiz_train_00016564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many holes are there in the weight plate?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 30.546875, "loss": 4.33632230758667 }, { "id": "identity_27322", "image": "ScienceQA/images/6522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Tampa\n(B) Indianapolis\n(C) Tallahassee\n(D) Concord" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.578125, "loss": 4.171500205993652 }, { "id": "identity_296821", "image": "PMC-VQA/images/PMC7756970_F0007_325379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the consistency of the lesion on the outer edge?\nChoices:\n(A) Firm\n(B) Soft\n(C) Uneven\n(D) Absent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.56782341003418 }, { "id": "identity_294375", "image": "TextVQA/images/b18c08c6778457b9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company is the laptop branded by?" }, { "from": "gpt", "value": "The answer is dell" } ], "clip_score": 28.265625, "loss": 4.069314002990723 }, { "id": "identity_41860", "image": "FigureQA/images/16107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Violet Red and Dark Orange segments?" }, { "from": "gpt", "value": "The answer is an estimated percentage" } ], "clip_score": 31.28125, "loss": 4.594082355499268 }, { "id": "identity_262213", "image": "AI2D/images/3242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to the cytoplasm?\nChoices:\n(A) m\n(B) j\n(C) d\n(D) n" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 4.270112037658691 }, { "id": "identity_24494", "image": "VQA2.0/images/96877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the audience members feel in this moment?" }, { "from": "gpt", "value": "The answer is happy" } ], "clip_score": 14.421875, "loss": 4.126879692077637 }, { "id": "identity_312343", "image": "FigureQA/images/52605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.46875, "loss": 4.091065883636475 }, { "id": "identity_293720", "image": "A-OKVQA/images/160740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which country is the photo taken? Is it from China, Japan, North Korea, or South Korea?\nChoices:\n(A) china\n(B) japan\n(C) north korea\n(D) south korea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.671875, "loss": 4.1874799728393555 }, { "id": "identity_254082", "image": "DVQA/images/bar_train_00024517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer each object across all categories, and which object has the second highest total preference?" }, { "from": "gpt", "value": "The answer is family" } ], "clip_score": 32.625, "loss": 5.18260383605957 }, { "id": "identity_317959", "image": "FigureQA/images/80360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.203125, "loss": 4.77157735824585 }, { "id": "identity_1431", "image": "Super-CLEVR/images/superCLEVR_new_000026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the green bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.515625, "loss": 4.0994462966918945 }, { "id": "identity_321297", "image": "AI2D/images/2376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted at stage B in the diagram?\nChoices:\n(A) caterpillar\n(B) none of the above\n(C) eggs\n(D) pupa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.359375, "loss": 4.2765913009643555 }, { "id": "identity_214218", "image": "VQA2.0/images/286292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorbikes can be seen parked in front of the building?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.03125, "loss": 4.919840335845947 }, { "id": "identity_285318", "image": "TextVQA/images/438d7046bdb353bd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the message on the red poster?" }, { "from": "gpt", "value": "The answer is noise" } ], "clip_score": 16.609375, "loss": 5.130650520324707 }, { "id": "identity_125668", "image": "A-OKVQA/images/575222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many carriages are attached to the train?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.6875, "loss": 4.257441520690918 }, { "id": "identity_37551", "image": "PMC-VQA/images/PMC6334729_Fig2_423619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is affected by disease? \nChoices:\n(A) Limbs \n(B) Lungs \n(C) Eyes \n(D) Ears " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.042266368865967 }, { "id": "identity_268478", "image": "VizWiz/images/VizWiz_train_00010815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flavor of this coffee?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.25, "loss": 4.085998058319092 }, { "id": "identity_328687", "image": "PMC-VQA/images/PMC4169585_Fig6_321500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to check bone regeneration?\nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.859375, "loss": 4.746039390563965 }, { "id": "identity_292071", "image": "PMC-VQA/images/PMC8063182_fig0001_423103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the yellow arrow in the MRI scan?\nChoices:\n(A) Area of interest of affected MRI signal \n(B) Clinically significant MRI signal drop out \n(C) Loss of MRI signal around the chamber \n(D) Unremarkable MRI signal loss around the chamber." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.90625, "loss": 4.565384864807129 }, { "id": "identity_25723", "image": "A-OKVQA/images/31019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the traffic cones positioned in the location that they are?\nChoices:\n(A) directions\n(B) road closure\n(C) art\n(D) construction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.71875, "loss": 4.211969375610352 }, { "id": "identity_135574", "image": "DVQA/images/bar_train_00007475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy on the \"weight\" algorithm?" }, { "from": "gpt", "value": "The answer is degree" } ], "clip_score": 34.46875, "loss": 4.209760665893555 }, { "id": "identity_35399", "image": "A-OKVQA/images/337780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mode of transport here is the oldest?\nChoices:\n(A) motorcycle\n(B) taxi\n(C) van\n(D) elephant" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.578125, "loss": 4.069305419921875 }, { "id": "identity_24530", "image": "FigureQA/images/40529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick greater than Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.098563194274902 }, { "id": "identity_288538", "image": "PMC-VQA/images/PMC6780818_nanomaterials-09-01195-f013_47195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscope was used to produce the image: Optical microscope, Electron microscope, Scanning probe microscope, or X-ray microscope?\nChoices:\n(A) Optical microscope \n(B) Electron microscope \n(C) Scanning probe microscope \n(D) X-ray microscope" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.190587520599365 }, { "id": "identity_334685", "image": "PMC-VQA/images/PMC8065097_FIG1_423908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the test used to visualize this organ?\nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.03125, "loss": 4.2989912033081055 }, { "id": "identity_214995", "image": "VQA2.0/images/531912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels of the skateboard are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.65625, "loss": 4.242131233215332 }, { "id": "identity_289266", "image": "TQA/images/abc_question_images/layers_of_atmosphere_17069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option corresponds to the troposphere?\nChoices:\n(A) C\n(B) P\n(C) L\n(D) X" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.984375, "loss": 4.023980140686035 }, { "id": "identity_23682", "image": "PMC-VQA/images/PMC3774635_pone-0074330-g003_231686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cell type is represented in the image? \nChoices:\n(A) Osteoblasts \n(B) Osteoclasts \n(C) Neurons \n(D) Adipocytes " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.359375, "loss": 4.335641384124756 }, { "id": "identity_269801", "image": "VQA2.0/images/55637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tree surrounded by a fence?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 9.234375, "loss": 4.087700843811035 }, { "id": "identity_299362", "image": "FigureQA/images/95850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.453125, "loss": 4.492266654968262 }, { "id": "identity_139392", "image": "MapQA/images/map_10594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code representing festivities in the range of 103678-221051?" }, { "from": "gpt", "value": "The answer is a color name" } ], "clip_score": 23.5, "loss": 5.207976341247559 }, { "id": "identity_65475", "image": "ScienceQA/images/4780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 26.703125, "loss": 4.549068450927734 }, { "id": "identity_296213", "image": "A-OKVQA/images/449103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman in the black boots engaged in with the can? Is she:\n(A) throwing\n(B) buying\n(C) drinking\n(D) recycling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.4375, "loss": 4.5602264404296875 }, { "id": "identity_222698", "image": "A-OKVQA/images/559483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 15.3515625, "loss": 4.374163627624512 }, { "id": "identity_327900", "image": "PMC-VQA/images/PMC6209937_sensors-18-03475-f008_388972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the yellow line in this image?\nChoices:\n(A) to show the position of a needle tip\n(B) to show the position of a needle shaft\n(C) to indicate a boundary on the B-mode ultrasound image\n(D) to represent a nerve." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 5.389434814453125 }, { "id": "identity_304964", "image": "TQA/images/abc_question_images/human_system_respiratory_16204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the pharynx in the diagram?\nChoices:\n(A) R\n(B) E\n(C) D\n(D) L" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.515625, "loss": 4.278851509094238 }, { "id": "identity_188299", "image": "Super-CLEVR/images/superCLEVR_new_000074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the purple object with wheels?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 27.796875, "loss": 4.148525714874268 }, { "id": "identity_128511", "image": "A-OKVQA/images/425184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete bird cages are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.71875, "loss": 4.550784587860107 }, { "id": "identity_338553", "image": "PMC-VQA/images/PMC7400152_diagnostics-10-00465-f001_225737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI result for this? \nChoices:\n(A) hyperintense \n(B) isointense \n(C) both hyper and hypointense \n(D) N/A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.515625, "loss": 4.1548871994018555 }, { "id": "identity_315242", "image": "A-OKVQA/images/286376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport is this equipment used for?\nChoices:\n(A) skating\n(B) skateboarding\n(C) surfing\n(D) skiing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.90625, "loss": 4.210490703582764 }, { "id": "identity_269165", "image": "Super-CLEVR/images/superCLEVR_new_000733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small brown object made from the same material as the motorcycle located in front of the shiny cyan object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.359375, "loss": 5.308856010437012 }, { "id": "identity_14101", "image": "Super-CLEVR/images/superCLEVR_new_001338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the large minivan made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 24.609375, "loss": 4.227892875671387 }, { "id": "identity_47295", "image": "MapQA/images/map_788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall under the lowest percentage range of suits?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 28.671875, "loss": 4.025390625 }, { "id": "identity_37058", "image": "A-OKVQA/images/510949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the status of the red truck?\nChoices:\n(A) parked\n(B) going\n(C) waiting\n(D) broken down" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.140625, "loss": 4.62390661239624 }, { "id": "identity_136445", "image": "DocVQA/images/nhcn0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the first signatory on the list?" }, { "from": "gpt", "value": "The answer is Schosnig" } ], "clip_score": 21.765625, "loss": 4.357442855834961 }, { "id": "identity_217608", "image": "PMC-VQA/images/PMC7939567_F0003_386118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality is indicated by the red arrow in the MRI image?" }, { "from": "gpt", "value": "The answer is Spinal lesion or mass" } ], "clip_score": 35.0625, "loss": 4.20871114730835 }, { "id": "identity_26379", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type stove is seen here?\nChoices:\n(A) electric\n(B) natural gas\n(C) wood\n(D) conductive" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.15625, "loss": 4.464409351348877 }, { "id": "identity_125321", "image": "PMC-VQA/images/PMC6712200_Fig10_29291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the white arrow in the image?" }, { "from": "gpt", "value": "The answer is Bladder calculus" } ], "clip_score": 31.6875, "loss": 4.2715888023376465 }, { "id": "identity_6399", "image": "ScienceQA/images/7178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) North Carolina\n(B) Pennsylvania\n(C) Connecticut\n(D) Alabama" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.110566139221191 }, { "id": "identity_286876", "image": "A-OKVQA/images/124609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where is the smoke near the horses originating?\nChoices:\n(A) auto exhaust\n(B) horses kicking\n(C) sticks\n(D) mountains" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.759856700897217 }, { "id": "identity_265213", "image": "IconQA/images/730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of this object?\nOptions:\n(A) circle\n(B) square" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.375, "loss": 4.091503620147705 }, { "id": "identity_76585", "image": "AI2D/images/1812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are shown in this food chain?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.21875, "loss": 4.318358421325684 }, { "id": "identity_70060", "image": "TQA/images/question_images/seasons_1045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which Earth position, labeled A, B, C, or D, represents the summer solstice in the Northern Hemisphere?" }, { "from": "gpt", "value": "The answer is Position B" } ], "clip_score": 37.4375, "loss": 4.197831153869629 }, { "id": "identity_151481", "image": "ScienceQA/images/308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the Victoria crowned pigeon's plumage?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 36.625, "loss": 4.178150653839111 }, { "id": "identity_326618", "image": "A-OKVQA/images/148427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is on the white vehicle?\nChoices:\n(A) italy\n(B) ireland\n(C) germany\n(D) russia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.0390625, "loss": 4.152050018310547 }, { "id": "identity_108561", "image": "A-OKVQA/images/31923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.625, "loss": 4.106778144836426 }, { "id": "identity_281619", "image": "PMC-VQA/images/PMC6206379_Fig4_387477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the histological image obtained through azan staining?\nChoices:\n(A) Parallel fibrillar structure of the collagen \n(B) Swelling of the collagen \n(C) Loss of the parallel fibrillar structure of the collagen \n(D) Enhancement of the parallel fibrillar structure of the collagen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.375, "loss": 4.128909111022949 }, { "id": "identity_168976", "image": "IconQA/images/73664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shape symmetrical? If so, how many lines of symmetry does it have?" }, { "from": "gpt", "value": "The answer is Yes, 8" } ], "clip_score": 29.40625, "loss": 4.095409393310547 }, { "id": "identity_279662", "image": "FigureQA/images/26813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 4.578309535980225 }, { "id": "identity_142312", "image": "DVQA/images/bar_train_00164174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest number of units sold in the 'dealer' store?" }, { "from": "gpt", "value": "The answer is mature" } ], "clip_score": 30.515625, "loss": 4.213266849517822 }, { "id": "identity_268087", "image": "AI2D/images/3140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the upper epidermis of the leaf among the following choices: N, P, B, or E?\nChoices:\n(A) N\n(B) P\n(C) B\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.582939147949219 }, { "id": "identity_334383", "image": "PMC-VQA/images/PMC6567228_F0002_493862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to detect the pelvic mass?\nChoices:\n(A) MRI\n(B) CT scan\n(C) ultrasound\n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.75, "loss": 4.024941921234131 }, { "id": "identity_323081", "image": "TabMWP/images/9967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given schedule, which activity starts at 6:00 P.M.?\nChoices:\n(A) theater auditions\n(B) choir rehearsal\n(C) orchestra rehearsal\n(D) swim team practice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.609375, "loss": 4.183335304260254 }, { "id": "identity_39183", "image": "DVQA/images/bar_train_00198554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest 'roof' value?" }, { "from": "gpt", "value": "The answer is curve" } ], "clip_score": 21.921875, "loss": 4.2596049308776855 }, { "id": "identity_328259", "image": "ScienceQA/images/6496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) salty\n(B) translucent\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.59375, "loss": 4.167112350463867 }, { "id": "identity_90757", "image": "UniGeo/images/1258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If BM is tangent to circle O at point B and angle MBA is 140 degrees, what is the measure of angle OAC?" }, { "from": "gpt", "value": "The answer is Error in given information" } ], "clip_score": 34.8125, "loss": 4.627702236175537 }, { "id": "identity_280249", "image": "AI2D/images/4480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents Catkin in the given choices?\n(A) A\n(B) C\n(C) I\n(D) K" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0703125, "loss": 4.709001064300537 }, { "id": "identity_270072", "image": "DVQA/images/bar_train_00125154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For any dataset, which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is wheel" } ], "clip_score": 28.40625, "loss": 4.121027946472168 }, { "id": "identity_241076", "image": "TextVQA/images/9daaebb265791d28.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are on the table, and what is the title of the book on the poster?" }, { "from": "gpt", "value": "The answer is 16 books, Il libreto" } ], "clip_score": 18.28125, "loss": 4.203919887542725 }, { "id": "identity_265633", "image": "A-OKVQA/images/429456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the individual using an umbrella? Is it due to the sun, rain, snow, or a costume?\nChoices:\n(A) sun\n(B) rain\n(C) snow\n(D) costume" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.3349995613098145 }, { "id": "identity_115983", "image": "Super-CLEVR/images/superCLEVR_new_000041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the largest in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 21.78125, "loss": 4.893558025360107 }, { "id": "identity_330533", "image": "PMC-VQA/images/PMC6745445_ccr32271-fig-0003_37944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this imaging technique?\nChoices:\n(A) Radiography\n(B) Magnetic resonance imaging\n(C) Computerized tomography\n(D) Nuclear medicine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.453125, "loss": 4.140334606170654 }, { "id": "identity_18872", "image": "FigureQA/images/78464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.83525276184082 }, { "id": "identity_227865", "image": "VQA2.0/images/28698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of animals are present in the picture?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 12.015625, "loss": 4.075467586517334 }, { "id": "identity_198351", "image": "A-OKVQA/images/356394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building on the left side of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.828125, "loss": 4.045035362243652 }, { "id": "identity_305823", "image": "ScienceQA/images/8711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these three items?\nChoices:\n(A) bumpy\n(B) fragile\n(C) sweet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.96875, "loss": 4.5490031242370605 }, { "id": "identity_292412", "image": "A-OKVQA/images/147568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the man seated?\nChoices:\n(A) bike\n(B) couch\n(C) dugout\n(D) bed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.125, "loss": 4.510659217834473 }, { "id": "identity_282208", "image": "PMC-VQA/images/PMC5559574_Fig2_203939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which radiograph view is referred to in the caption?\nChoices:\n(A) True profile view\n(B) False profile view\n(C) Lateral view\n(D) Posteroanterior view" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.609375, "loss": 4.412331581115723 }, { "id": "identity_285892", "image": "DocVQA/images/pjxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the current President of the Pennsylvania Dietetic Association?" }, { "from": "gpt", "value": "The answer is Sister Maude Behrman" } ], "clip_score": 29.015625, "loss": 4.200591087341309 }, { "id": "identity_250686", "image": "A-OKVQA/images/521879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the tracks ensure that the vessel pulled by the horse goes straight, and what additional elements in the image support the smooth operation of this system?" }, { "from": "gpt", "value": "The answer is Tracks guide wheels; driver manages horse; harness controls horse" } ], "clip_score": 21.0, "loss": 4.039173126220703 }, { "id": "identity_29814", "image": "A-OKVQA/images/98493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What age people mostly utilize this space?\nChoices:\n(A) toddlers\n(B) teens\n(C) senior citizens\n(D) adults" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.484375, "loss": 4.60532283782959 }, { "id": "identity_317241", "image": "AI2D/images/3368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brightly colored part of a flower immediately surrounding the reproductive organs called?\nChoices:\n(A) Anthers\n(B) Pistil\n(C) Petal\n(D) Sepal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.09375, "loss": 4.784339427947998 }, { "id": "identity_268268", "image": "PMC-VQA/images/PMC7564512_jcm-09-03040-f002_275604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the wrist is impacted by the condition? \nOptions:\n(A) Peripheral nerve\n(B) Tendon\n(C) Capsule\n(D) Fascia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.59375, "loss": 4.43986701965332 }, { "id": "identity_45500", "image": "DocVQA/images/khgl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason given for not mailing the letter?" }, { "from": "gpt", "value": "The answer is Uncertainty about waiting for criticism" } ], "clip_score": 24.4375, "loss": 5.066354751586914 }, { "id": "identity_10284", "image": "A-OKVQA/images/67982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the red coat made of?\nChoices:\n(A) pic\n(B) nylon\n(C) leather\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.15625, "loss": 4.336838722229004 }, { "id": "identity_220502", "image": "VizWiz/images/VizWiz_train_00009579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible near the entrance?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.96875, "loss": 4.433192253112793 }, { "id": "identity_16558", "image": "TQA/images/abc_question_images/parts_neuron_17227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the part that receives nerve impulses from other cells.\nChoices:\n(A) M\n(B) A\n(C) S\n(D) U" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0, "loss": 4.01651668548584 }, { "id": "identity_21634", "image": "PMC-VQA/images/PMC6707203_Fig2_27521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrowhead indicate in the image? \nChoices:\n(A) Cell nucleus \n(B) Labeled skin cell \n(C) Mitochondria \n(D) Cell membrane. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.75, "loss": 4.524130821228027 }, { "id": "identity_306976", "image": "PMC-VQA/images/PMC5341329_F4_135511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the upper right sections of the images?\nChoices:\n(A) Calreticulin\n(B) Nuclei\n(C) Radiated cells\n(D) Isotype control" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.953125, "loss": 4.889847755432129 }, { "id": "identity_254382", "image": "A-OKVQA/images/195561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual clues in the image that indicate the season is winter, and how do they collectively confirm the season?" }, { "from": "gpt", "value": "The answer is winter" } ], "clip_score": 17.765625, "loss": 5.283862113952637 }, { "id": "identity_222525", "image": "Super-CLEVR/images/superCLEVR_new_000883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the smaller minivan to the left or right of the larger minivan?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 27.0, "loss": 4.0897536277771 }, { "id": "identity_304234", "image": "AI2D/images/4061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this diagram, which letter indicates the chromosphere?\nChoices:\n(A) G\n(B) F\n(C) E\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.765625, "loss": 4.250760555267334 }, { "id": "identity_12758", "image": "AI2D/images/4297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given image which layer lies above the upper mantle?\nChoices:\n(A) oceanic crust\n(B) continental crust\n(C) plastic asthenosphere\n(D) lithosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.09375, "loss": 4.598475933074951 }, { "id": "identity_27568", "image": "A-OKVQA/images/231631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of pattern is the road?\nChoices:\n(A) square\n(B) flat\n(C) bumpy\n(D) black" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.96484375, "loss": 4.170107364654541 }, { "id": "identity_1897", "image": "ScienceQA/images/19517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Charleston\n(B) Columbia\n(C) Lansing\n(D) Los Angeles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 4.276419639587402 }, { "id": "identity_336876", "image": "Super-CLEVR/images/superCLEVR_new_000414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large green metal objects of the same shape as the large brown metal object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.296875, "loss": 5.248909950256348 }, { "id": "identity_328906", "image": "A-OKVQA/images/484327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the sign discouraging at specific times?\nChoices:\n(A) parking\n(B) loitering\n(C) eating\n(D) turns" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.234375, "loss": 4.096197605133057 }, { "id": "identity_35282", "image": "A-OKVQA/images/350552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the horse in the town center?\nChoices:\n(A) its shopping\n(B) its exercising\n(C) its working\n(D) its eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.03125, "loss": 4.294962406158447 }, { "id": "identity_308094", "image": "AI2D/images/2303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, which letter/s depict this stage?\nChoices:\n(A) C & D\n(B) A & C\n(C) B & D\n(D) A & B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.5703125, "loss": 4.1711554527282715 }, { "id": "identity_321791", "image": "ScienceQA/images/13369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Phoenix\n(B) Helena\n(C) Billings\n(D) Providence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.103692054748535 }, { "id": "identity_328805", "image": "ScienceQA/images/10468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Saint Paul\n(B) Honolulu\n(C) Boise\n(D) Nashville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.328125, "loss": 4.137078285217285 }, { "id": "identity_35158", "image": "TextVQA/images/b66acdd4b6eaceea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what type of fight is this?" }, { "from": "gpt", "value": "The answer is usf" } ], "clip_score": 20.765625, "loss": 4.0916900634765625 }, { "id": "identity_204790", "image": "A-OKVQA/images/157093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the dugout?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.515625, "loss": 4.681199550628662 }, { "id": "identity_34171", "image": "PMC-VQA/images/PMC3905103_F3_262232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main symptom of the condition shown in the image? \nChoices:\n(A) Greenish spots on the retina \n(B) Bilateral hyperfluorescent lesions \n(C) Miosis \n(D) Diplopia " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.828125, "loss": 4.370083808898926 }, { "id": "identity_321386", "image": "IconQA/images/79194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.53125, "loss": 4.379860877990723 }, { "id": "identity_275818", "image": "IconQA/images/68248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape from the given options: cone, cube, or sphere.\nChoices:\n(A) cone\n(B) cube\n(C) sphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.1875, "loss": 4.091781139373779 }, { "id": "identity_269482", "image": "FigureQA/images/44986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.579988479614258 }, { "id": "identity_335318", "image": "A-OKVQA/images/312041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this item used for?\nChoices:\n(A) sharpener\n(B) paperweight\n(C) flynt\n(D) candle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.890625, "loss": 4.419105529785156 }, { "id": "identity_28421", "image": "A-OKVQA/images/213117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the logo on the sides of the shoes worn by the baseball batter?\nChoices:\n(A) black\n(B) green\n(C) red\n(D) white" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.0, "loss": 4.018307209014893 }, { "id": "identity_246887", "image": "IconQA/images/21394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 30.109375, "loss": 4.4518046379089355 }, { "id": "identity_107677", "image": "TextVQA/images/f96a7cbc249b7684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flavor of the Trix cereal as per the box?" }, { "from": "gpt", "value": "The answer is Fruitalicious" } ], "clip_score": 28.875, "loss": 4.081615924835205 }, { "id": "identity_238657", "image": "A-OKVQA/images/195645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the liquid in the bottle held by the smallest child, and how many people in the image are holding a glass with a similar colored liquid?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 17.109375, "loss": 4.927109718322754 }, { "id": "identity_322847", "image": "Super-CLEVR/images/superCLEVR_new_001081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large object on the left side of the big blue bike the same color as the matte vehicle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.5625, "loss": 5.200959205627441 }, { "id": "identity_273144", "image": "Super-CLEVR/images/superCLEVR_new_001529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object in front of the small shiny truck?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 17.765625, "loss": 4.0274200439453125 }, { "id": "identity_33841", "image": "VQA2.0/images/480153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is that crane?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 15.9765625, "loss": 4.417080879211426 }, { "id": "identity_217091", "image": "TextVQA/images/00fbe21f9e1b79de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red lights are visible on the back of the bus?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.84375, "loss": 4.255101680755615 }, { "id": "identity_294694", "image": "A-OKVQA/images/341132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the woman with gray hair leaning towards the table?\nChoices:\n(A) hiding\n(B) resting\n(C) sitting down\n(D) blowing candles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.71875, "loss": 4.513150215148926 }, { "id": "identity_333713", "image": "A-OKVQA/images/418346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on this?\n\nChoices:\n(A) statues\n(B) hair clips\n(C) monkeys\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.3125, "loss": 4.49559211730957 }, { "id": "identity_327809", "image": "PMC-VQA/images/PMC5733951_fig1_254274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which artery is significantly narrowed in this image?\nChoices:\n(A) Right internal carotid artery\n(B) Left internal carotid artery\n(C) Brachiocephalic artery\n(D) Femoral artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 4.202568054199219 }, { "id": "identity_302600", "image": "A-OKVQA/images/250592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What’s the term for how this is parked?\nChoices:\n(A) docked\n(B) waiting\n(C) anchored\n(D) setting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.390625, "loss": 4.654778480529785 }, { "id": "identity_190118", "image": "Super-CLEVR/images/superCLEVR_new_001030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny vehicle to the left of the purple aeroplane made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 28.71875, "loss": 4.762967109680176 }, { "id": "identity_280188", "image": "IconQA/images/54607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on? \nOptions:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.875, "loss": 4.425678253173828 }, { "id": "identity_309971", "image": "ScienceQA/images/16956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a mammal?\nChoices:\n(A) toco toucan\n(B) green iguana\n(C) thresher shark\n(D) sea otter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.90625, "loss": 4.572842597961426 }, { "id": "identity_272738", "image": "A-OKVQA/images/157037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this building type?\nChoices:\n(A) library\n(B) mall\n(C) supermarket\n(D) deli" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.494081974029541 }, { "id": "identity_297057", "image": "A-OKVQA/images/249288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following signs belongs to a chain burger restaurant?\nChoices:\n(A) gifts\n(B) mcdonalds\n(C) forever 21\n(D) disney" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.796875, "loss": 4.150746822357178 }, { "id": "identity_47754", "image": "DocVQA/images/hgcg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main agenda item to be reviewed in the meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Note" } ], "clip_score": 25.421875, "loss": 4.456608295440674 }, { "id": "identity_13004", "image": "A-OKVQA/images/132336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of company owns the roofless bus?\nChoices:\n(A) travel\n(B) city bus\n(C) tourist\n(D) sightseeing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.203125, "loss": 4.06411600112915 }, { "id": "identity_16522", "image": "FigureQA/images/56463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Olive the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.359375, "loss": 4.557492256164551 }, { "id": "identity_37510", "image": "DVQA/images/bar_train_00069984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of luxury smaller than faint?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.75, "loss": 4.104161262512207 }, { "id": "identity_325131", "image": "PMC-VQA/images/PMC4933353_ppat.1005739.g001_47959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red arrows in the bottom row point to?\nChoices:\n(A) Metastases in the liver.\n(B) Inflammation in the lungs.\n(C) Cancer in the heart.\n(D) Enlarged spleen." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0625, "loss": 4.187182426452637 }, { "id": "identity_331459", "image": "DVQA/images/bar_train_00093271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.34375, "loss": 4.076539039611816 }, { "id": "identity_14444", "image": "TQA/images/question_images/human_system_digestive_6097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the area labelled 'B'?\nChoices:\n(A) Stomach\n(B) Liver\n(C) Mouth\n(D) Colon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.5, "loss": 4.394855499267578 }, { "id": "identity_945", "image": "FigureQA/images/66722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.765625, "loss": 4.364634037017822 }, { "id": "identity_66340", "image": "AI2D/images/3716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many salivary glands are labeled in the diagram of the digestive system?" }, { "from": "gpt", "value": "The answer is Three pairs" } ], "clip_score": 35.125, "loss": 5.071643829345703 }, { "id": "identity_269306", "image": "TQA/images/question_images/em_spectrum_6820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option on the chart has the lowest wavelength?\nChoices:\n(A) Microwave\n(B) Radio\n(C) Visible\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.703125, "loss": 4.753439903259277 }, { "id": "identity_124425", "image": "VizWiz/images/VizWiz_train_00023633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.171875, "loss": 4.597573280334473 }, { "id": "identity_293065", "image": "AI2D/images/2599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram depicting the sun, moon, and Earth, which type of tides are represented at point D?\nChoices:\n(A) solar\n(B) spring\n(C) lunar\n(D) summer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.65625, "loss": 4.421451568603516 }, { "id": "identity_310819", "image": "FigureQA/images/20844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.140625, "loss": 4.687734127044678 }, { "id": "identity_18582", "image": "PMC-VQA/images/PMC8142488_Fig2_447912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the photograph show? \nChoices:\n(A) The wing of M. sharovi specimen 212/26 with tarsal segments marked \n(B) The leg of M. sharovi specimen 212/26 under ethanol \n(C) M. sharovi specimen 212/26 in its natural habitat \n(D) M. sharovi specimen 212/26 being dissected " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.765625, "loss": 4.164651393890381 }, { "id": "identity_292227", "image": "VizWiz/images/VizWiz_train_00023807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pen?" }, { "from": "gpt", "value": "The answer is blue black red" } ], "clip_score": 24.109375, "loss": 4.459089756011963 }, { "id": "identity_288209", "image": "TabMWP/images/37204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.531805992126465 }, { "id": "identity_319265", "image": "Super-CLEVR/images/superCLEVR_new_001531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small metallic object of the same shape as the large blue one?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 19.546875, "loss": 4.98905086517334 }, { "id": "identity_30332", "image": "ScienceQA/images/10202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Victoria amazonica have cells that have a nucleus?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.21875, "loss": 4.56791877746582 }, { "id": "identity_282146", "image": "ScienceQA/images/20562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Connecticut\n(B) North Carolina\n(C) Maryland\n(D) Indiana" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.498378276824951 }, { "id": "identity_284787", "image": "A-OKVQA/images/30133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is responsible for the conveyance shown in the image? \nOptions:\n(A) military\n(B) toy stores\n(C) movie studios\n(D) post office" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9921875, "loss": 4.163351535797119 }, { "id": "identity_279974", "image": "Super-CLEVR/images/superCLEVR_new_000789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any small trucks have the same color as the small rubber double bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.84375, "loss": 5.489323139190674 }, { "id": "identity_303514", "image": "PMC-VQA/images/PMC2709904_F2_41527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of these images?\nChoices:\n(A) to illustrate the behaviour of a lung cancer before and during therapy\n(B) to illustrate the behaviour of a colon cancer before and during therapy\n(C) to illustrate the behaviour of a kidney cancer before and during therapy\n(D) to illustrate the behaviour of a brain cancer before and during therapy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.015625, "loss": 4.157984733581543 }, { "id": "identity_299576", "image": "A-OKVQA/images/326161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the lamps in assisting people with? \nChoices:\n(A) sleep\n(B) see\n(C) smell\n(D) hear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.15625, "loss": 4.9759721755981445 }, { "id": "identity_293834", "image": "PMC-VQA/images/PMC6000605_Figure1_328705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which cavity is the sublobe mass located?\nChoices:\n(A) Right thoracic cavity\n(B) Abdominal cavity\n(C) Left thoracic cavity\n(D) Pelvic cavity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.25, "loss": 4.417669296264648 }, { "id": "identity_160084", "image": "GeoQA+/images/4522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the straight ruler is considered as a transversal line, what type of angles are ∠1 and ∠2?" }, { "from": "gpt", "value": "The answer is adjacent angles" } ], "clip_score": 34.5625, "loss": 4.144569396972656 }, { "id": "identity_289711", "image": "TextVQA/images/0166adf82f1557c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is the final one on the train?" }, { "from": "gpt", "value": "The answer is l" } ], "clip_score": 19.53125, "loss": 4.714118003845215 }, { "id": "identity_278111", "image": "FigureQA/images/91567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Periwinkle or Medium Mint?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.490353584289551 }, { "id": "identity_323792", "image": "A-OKVQA/images/413248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is depicted?\nChoices:\n(A) water\n(B) air\n(C) rail\n(D) road" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.984375, "loss": 4.374906539916992 }, { "id": "identity_27946", "image": "PMC-VQA/images/PMC3356314_pone-0037520-g003_138700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the boxes in the micrographs indicate? \nChoices:\n(A) Areas with high muscle density \n(B) Areas with low muscle density \n(C) Areas shown in other micrographs \n(D) Areas with no muscle scars " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 4.249039173126221 }, { "id": "identity_331700", "image": "VQA2.0/images/390792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are they coming out of?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 8.9609375, "loss": 4.056082725524902 }, { "id": "identity_326773", "image": "A-OKVQA/images/125380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who has ended up in the water?\nChoices:\n(A) spectator\n(B) officer\n(C) child\n(D) jockey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.546875, "loss": 4.074090003967285 }, { "id": "identity_266862", "image": "TQA/images/question_images/parts_worm_7334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part connects the crop to the pharynx among the following choices: (A) aortic arches, (B) esophagus, (C) gizzard, or (D) brain?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 4.395272254943848 }, { "id": "identity_305793", "image": "TextVQA/images/a00f8bc5793d21d3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you navigate this?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 15.0, "loss": 4.328258037567139 }, { "id": "identity_8906", "image": "IconQA/images/60431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is ninth?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.53125, "loss": 4.350930690765381 }, { "id": "identity_49597", "image": "DVQA/images/bar_train_00068523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest number of people preferring the object desk?" }, { "from": "gpt", "value": "The answer is box" } ], "clip_score": 24.984375, "loss": 4.196843147277832 }, { "id": "identity_26036", "image": "ScienceQA/images/20530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) yellow\n(B) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.59375, "loss": 4.572731018066406 }, { "id": "identity_114302", "image": "Super-CLEVR/images/superCLEVR_new_000638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the small white bus?" }, { "from": "gpt", "value": "The answer is The vehicle is a tram" } ], "clip_score": 25.25, "loss": 4.381183624267578 }, { "id": "identity_305648", "image": "AI2D/images/3438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the Hind leg in the diagram?\nChoices:\n(A) BF\n(B) BE\n(C) BB\n(D) BS" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.734375, "loss": 4.217637538909912 }, { "id": "identity_190809", "image": "PMC-VQA/images/PMC6108962_Fig3_358587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teeth are visible in the lower jaw?" }, { "from": "gpt", "value": "The answer is Sixteen teeth" } ], "clip_score": 24.984375, "loss": 4.947610378265381 }, { "id": "identity_146014", "image": "DocVQA/images/xfhb0228_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is moderating the panel on Registration Systems in Other Countries?" }, { "from": "gpt", "value": "The answer is Mr. George Jones" } ], "clip_score": 25.046875, "loss": 4.037045478820801 }, { "id": "identity_195386", "image": "A-OKVQA/images/229969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing in the water?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.90625, "loss": 4.87065315246582 }, { "id": "identity_288903", "image": "TabMWP/images/13157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In 2006, which product cost more per cwt for the Idaho farmer, apples or Timothy seed?\nChoices:\n(A) apples\n(B) Timothy seed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.4375, "loss": 4.15429162979126 }, { "id": "identity_105949", "image": "TextVQA/images/5719794943b820c9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common feature of the clocks in the bottom row?" }, { "from": "gpt", "value": "The answer is Parents of" } ], "clip_score": 28.609375, "loss": 5.78749942779541 }, { "id": "identity_26432", "image": "ScienceQA/images/948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Missouri?\nChoices:\n(A) Jefferson City\n(B) Indianapolis\n(C) Bismarck\n(D) Springfield" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 4.087604999542236 }, { "id": "identity_53345", "image": "DocVQA/images/hjmd0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the event location type?" }, { "from": "gpt", "value": "The answer is Office" } ], "clip_score": 18.859375, "loss": 4.327674388885498 }, { "id": "identity_311286", "image": "Super-CLEVR/images/superCLEVR_new_000012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a small brown vehicle made of the same material as the brown tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.359375, "loss": 5.173916816711426 }, { "id": "identity_304507", "image": "DVQA/images/bar_train_00174539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.171875, "loss": 4.129334926605225 }, { "id": "identity_221635", "image": "TextVQA/images/e12cac7f4cd927a5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing a suit in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.578125, "loss": 5.085844993591309 }, { "id": "identity_291153", "image": "PMC-VQA/images/PMC7541359_Fig2_267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the consensus contours in the image? \nChoices:\n(A) Contours from individual physicians \n(B) Scans from MRI \n(C) Contours of the target volume \n(D) None of the options provided" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.65625, "loss": 4.526276111602783 }, { "id": "identity_36056", "image": "VQA-AS/images/2562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the danger in the scene?" }, { "from": "gpt", "value": "The answer is child near water" } ], "clip_score": 27.9375, "loss": 4.113202095031738 }, { "id": "identity_209559", "image": "A-OKVQA/images/151900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book located to the right of the iPod?" }, { "from": "gpt", "value": "The answer is The book title is \"Black & White Crosswords" } ], "clip_score": 18.4375, "loss": 4.062323570251465 }, { "id": "identity_122892", "image": "A-OKVQA/images/134339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the setting of the event in the image?" }, { "from": "gpt", "value": "The answer is An open field gathering" } ], "clip_score": 22.171875, "loss": 4.117271423339844 }, { "id": "identity_318636", "image": "DocVQA/images/hnfw0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the routing of this?" }, { "from": "gpt", "value": "The answer is Overnite" } ], "clip_score": 14.96875, "loss": 4.263061046600342 }, { "id": "identity_114890", "image": "PMC-VQA/images/PMC6127925_Fig5_364669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to label structures in the lower panel?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 16.046875, "loss": 4.114655494689941 }, { "id": "identity_231526", "image": "Super-CLEVR/images/superCLEVR_new_001448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of rubber, and what are their colors?" }, { "from": "gpt", "value": "The answer is One object, cyan" } ], "clip_score": 22.15625, "loss": 5.071091651916504 }, { "id": "identity_57392", "image": "FigureQA/images/49543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title\" is a placeholder" } ], "clip_score": 36.5, "loss": 4.972033500671387 }, { "id": "identity_28982", "image": "ScienceQA/images/4146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) blue\n(B) sour\n(C) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.046875, "loss": 4.42104434967041 }, { "id": "identity_20009", "image": "PMC-VQA/images/PMC6766146_fig2_42891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the patient's upper lobes? \nChoices:\n(A) Normal \n(B) Inflamed \n(C) Cancerous \n(D) Blocked " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.252659797668457 }, { "id": "identity_218232", "image": "A-OKVQA/images/148427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of accessory is the horse wearing on its nose?" }, { "from": "gpt", "value": "The answer is A noseband" } ], "clip_score": 21.546875, "loss": 4.197845935821533 }, { "id": "identity_32272", "image": "PMC-VQA/images/PMC4478850_F6_399424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tissue indicated by the solid arrow in the image? \nChoices:\n(A) Stele \n(B) Cortex tissue \n(C) Epidermis \n(D) Phloem tissue " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.140625, "loss": 4.869128704071045 }, { "id": "identity_27575", "image": "PMC-VQA/images/PMC6054369_FIG1_344233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are shown in the images? \nChoices:\n(A) Schwannoma \n(B) Meningioma \n(C) Glioblastoma \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5, "loss": 4.384221076965332 }, { "id": "identity_313121", "image": "Super-CLEVR/images/superCLEVR_new_000664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large blue thing of the same shape as a large red object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 10.7109375, "loss": 5.178650856018066 }, { "id": "identity_325016", "image": "A-OKVQA/images/392659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the animal positioned while riding on the bike?\nChoices:\n(A) handlebar\n(B) lap\n(C) basket\n(D) seat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.201257228851318 }, { "id": "identity_29181", "image": "A-OKVQA/images/268590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which plant does the yellow item on the plate here originate?\nChoices:\n(A) celery\n(B) tomato\n(C) cucumber\n(D) mustard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.375, "loss": 4.13237190246582 }, { "id": "identity_322207", "image": "A-OKVQA/images/190805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the item with the highest potassium content?\nChoices:\n(A) top\n(B) right\n(C) bottom\n(D) left" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3203125, "loss": 4.023940563201904 }, { "id": "identity_321187", "image": "ScienceQA/images/4305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) breakable\n(B) soft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 4.552390098571777 }, { "id": "identity_35327", "image": "VQA-AS/images/12810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could this room need a good cleaning?" }, { "from": "gpt", "value": "The answer is i cannot tell" } ], "clip_score": 29.328125, "loss": 4.1654582023620605 }, { "id": "identity_315563", "image": "TQA/images/question_images/earth_eclipses_681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of these events?\nChoices:\n(A) Solar eclipse\n(B) Moon equinox\n(C) Moon eclipse\n(D) Sun equinox" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.578125, "loss": 4.066766738891602 }, { "id": "identity_121637", "image": "VQA2.0/images/143845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing white shirts in the background?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 12.328125, "loss": 5.474802494049072 }, { "id": "identity_323202", "image": "PMC-VQA/images/PMC9078808_fig4_274145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which frame of this image is being predicted in the algorithmic annotation?\nChoices:\n(A) 1st frame,\n(B) 5th frame,\n(C) 15th frame,\n(D) 25th frame" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.044369220733643 }, { "id": "identity_104407", "image": "Super-CLEVR/images/superCLEVR_new_001723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles that are the same color as the helicopter?" }, { "from": "gpt", "value": "The answer is Yes, one vehicle" } ], "clip_score": 25.578125, "loss": 4.343465328216553 }, { "id": "identity_115659", "image": "Super-CLEVR/images/superCLEVR_new_000663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green and blue" } ], "clip_score": 20.015625, "loss": 4.0650315284729 }, { "id": "identity_328118", "image": "PMC-VQA/images/PMC5020733_f2_72526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is depicted in all the panels except for panel h?\nChoices:\n(A) lateral view\n(B) anterior view\n(C) ventral view\n(D) dorsal view" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.609375, "loss": 4.719604015350342 }, { "id": "identity_273591", "image": "VQA2.0/images/239752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the stripes in the legs run horizontally or vertically?" }, { "from": "gpt", "value": "The answer is both" } ], "clip_score": 22.5, "loss": 4.223714351654053 }, { "id": "identity_320731", "image": "PMC-VQA/images/PMC7305181_Fig2_195746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are depicted in the images?\nChoices:\n(A) Vascular lesions\n(B) Infectious lesions\n(C) Traumatic lesions\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.671875, "loss": 4.748293876647949 }, { "id": "identity_107501", "image": "VizWiz/images/VizWiz_train_00011327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What setting is the washing machine currently on?" }, { "from": "gpt", "value": "The answer is Off" } ], "clip_score": 26.9375, "loss": 4.776126861572266 }, { "id": "identity_294401", "image": "TabMWP/images/6137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the given schedule, at what time does the bus depart from the mall?\nChoices:\n(A) mall\n(B) playground\n(C) grocery store\n(D) library" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.75, "loss": 4.084296703338623 }, { "id": "identity_19660", "image": "DocVQA/images/qkxn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How should the implant fixation be checked to ensure that the cup is secure in the bone?" }, { "from": "gpt", "value": "The answer is with finger pressure" } ], "clip_score": 26.875, "loss": 4.620475769042969 }, { "id": "identity_296101", "image": "A-OKVQA/images/293825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the current situation on the road, with the following options:\n(A) protest\n(B) parade\n(C) traffic jam\n(D) car accident" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.90625, "loss": 4.500116348266602 }, { "id": "identity_334307", "image": "DVQA/images/bar_train_00150451.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.375, "loss": 4.14223575592041 }, { "id": "identity_331686", "image": "PMC-VQA/images/PMC9227253_medicina-58-00698-f003_322110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaflet is involved in mitral valve prolapse?\nChoices:\n(A) Anterior leaflet\n(B) Posterior leaflet\n(C) Both leaflets\n(D) The mitral valve does not have leaflets." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 4.150794982910156 }, { "id": "identity_320220", "image": "TabMWP/images/33873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.515625, "loss": 4.1065144538879395 }, { "id": "identity_302292", "image": "Super-CLEVR/images/superCLEVR_new_000322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the rubber bus the same size as the matte object behind the tiny blue motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.640625, "loss": 4.952467918395996 }, { "id": "identity_195711", "image": "A-OKVQA/images/522990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are pulling the carriage?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.71875, "loss": 4.095959663391113 }, { "id": "identity_283813", "image": "FigureQA/images/96038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.875, "loss": 4.234657287597656 }, { "id": "identity_321517", "image": "DocVQA/images/nsnm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the head of the foundation?" }, { "from": "gpt", "value": "The answer is Arthur I. Sternhell" } ], "clip_score": 17.390625, "loss": 5.154531478881836 }, { "id": "identity_335749", "image": "A-OKVQA/images/381948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of aircraft is flying above?\nChoices:\n(A) cargo\n(B) passenger\n(C) private\n(D) military" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.921875, "loss": 4.103175640106201 }, { "id": "identity_290672", "image": "PMC-VQA/images/PMC7532002_fig12_265168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to create the image? \nOptions:\n(A) Positron Emission Tomography (PET)\n(B) Magnetic Resonance Imaging (MRI)\n(C) Computed Tomography (CT)\n(D) Electroencephalogram (EEG)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.940206527709961 }, { "id": "identity_311553", "image": "Super-CLEVR/images/superCLEVR_new_001375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte dirtbike that is the same size as the gray object to the left of the blue metal bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.109375, "loss": 5.167848587036133 }, { "id": "identity_283340", "image": "A-OKVQA/images/334775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the maximum number of people that can sleep in this room? \n(A) six \n(B) four \n(C) two \n(D) eight" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.125, "loss": 4.004627704620361 }, { "id": "identity_334292", "image": "TextVQA/images/2633f60ac1bc0ea9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which sponsor has a yellow sign?" }, { "from": "gpt", "value": "The answer is continental " } ], "clip_score": 25.875, "loss": 4.272586822509766 }, { "id": "identity_148974", "image": "DocVQA/images/tgdd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the representative mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Paul Tweeten" } ], "clip_score": 24.546875, "loss": 4.230353832244873 }, { "id": "identity_194795", "image": "A-OKVQA/images/277384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the poles on the platform?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 17.390625, "loss": 4.108301162719727 }, { "id": "identity_220515", "image": "VQA2.0/images/277215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.171875, "loss": 4.278235912322998 }, { "id": "identity_207454", "image": "PMC-VQA/images/PMC8579540_Fig1_107443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the heart size within normal limits?" }, { "from": "gpt", "value": "The answer is Heart size appears normal" } ], "clip_score": 21.90625, "loss": 4.245813846588135 }, { "id": "identity_8730", "image": "TQA/images/question_images/hydrocarbons_7049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element are the hydrogen atoms connected to?\nChoices:\n(A) HC\n(B) None\n(C) CH\n(D) C" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.890625, "loss": 4.110520362854004 }, { "id": "identity_332960", "image": "A-OKVQA/images/13465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the green shirt's hands touching?\nChoices:\n(A) basketball hoop\n(B) chair\n(C) pillow\n(D) skateboard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.015625, "loss": 4.597670555114746 }, { "id": "identity_47109", "image": "DVQA/images/bar_train_00092685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"match\"?" }, { "from": "gpt", "value": "The answer is spread" } ], "clip_score": 21.65625, "loss": 4.9034342765808105 }, { "id": "identity_272084", "image": "VizWiz/images/VizWiz_train_00001637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do my clothes need to be washed?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.359375, "loss": 4.2613654136657715 }, { "id": "identity_9992", "image": "ScienceQA/images/16956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the mammal below.\nChoices:\n(A) toco toucan\n(B) green iguana\n(C) thresher shark\n(D) sea otter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.234375, "loss": 4.753976821899414 }, { "id": "identity_219144", "image": "A-OKVQA/images/523729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red clothing in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.234375, "loss": 4.283212661743164 }, { "id": "identity_332708", "image": "VQA2.0/images/210275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the giraffe located?" }, { "from": "gpt", "value": "The answer is zoo" } ], "clip_score": 30.546875, "loss": 4.009530067443848 }, { "id": "identity_1288", "image": "PMC-VQA/images/PMC7346301_FIG1_208846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the mass observed in the MRI scan? \nChoices:\n(A) Left frontal lobe \n(B) Right frontal lobe \n(C) Left temporal lobe \n(D) Right temporal lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.921875, "loss": 4.277954578399658 }, { "id": "identity_24228", "image": "A-OKVQA/images/3538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the jacket holding?\nChoices:\n(A) basket\n(B) kittens\n(C) eggs\n(D) skis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.65625, "loss": 4.210301399230957 }, { "id": "identity_70754", "image": "ScienceQA/images/264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slots does the handle of the screwdriver have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.828125, "loss": 4.101341724395752 }, { "id": "identity_275826", "image": "DVQA/images/bar_train_00091417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the lowest total accuracy?" }, { "from": "gpt", "value": "The answer is chip" } ], "clip_score": 29.171875, "loss": 4.3364081382751465 }, { "id": "identity_335501", "image": "IconQA/images/78593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a dog. What is the tenth image?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.640625, "loss": 4.625583648681641 }, { "id": "identity_207521", "image": "PMC-VQA/images/PMC6694554_Fig1_23746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is immediately adjacent to the upper arrow?" }, { "from": "gpt", "value": "The answer is The liver" } ], "clip_score": 25.046875, "loss": 4.264408111572266 }, { "id": "identity_24142", "image": "TextVQA/images/38451ec391f20ab5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is this an ipa?" }, { "from": "gpt", "value": "The answer is amish way" } ], "clip_score": 27.625, "loss": 6.5698676109313965 }, { "id": "identity_290941", "image": "A-OKVQA/images/254917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the paint on the brick squares at the base of this building?\nChoices:\n(A) blue\n(B) red\n(C) white\n(D) tan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.703125, "loss": 4.3191914558410645 }, { "id": "identity_304395", "image": "VQA-AS/images/13017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cat?" }, { "from": "gpt", "value": "The answer is tan" } ], "clip_score": 20.0, "loss": 4.61984920501709 }, { "id": "identity_189530", "image": "TextVQA/images/0995a1cba85060cd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books have 'Ecuador' written on their spines?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 33.90625, "loss": 4.3656415939331055 }, { "id": "identity_304784", "image": "PMC-VQA/images/PMC7416230_fig3_232014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiographic evidence for degeneration in the upper part of the cervical spine?\nChoices:\n(A) Upper arrow\n(B) Lower arrow\n(C) Both A and B\n(D) None of the above" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.328125, "loss": 4.402370929718018 }, { "id": "identity_211217", "image": "PMC-VQA/images/PMC8008917_f2_406371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black asterisks are present in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.46875, "loss": 4.581908702850342 }, { "id": "identity_146325", "image": "DocVQA/images/rxvh0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the document suggesting the center should do?" }, { "from": "gpt", "value": "The answer is Intensify intervention activities and ensure routine and frequent contacts" } ], "clip_score": 21.078125, "loss": 4.462183475494385 }, { "id": "identity_30494", "image": "A-OKVQA/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why do safety workers wear this florescent color?\nChoices:\n(A) visibility\n(B) style\n(C) tradition\n(D) cheaper" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.8359375, "loss": 4.120186805725098 }, { "id": "identity_329711", "image": "FigureQA/images/50646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet the highest median value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.546875, "loss": 4.4739603996276855 }, { "id": "identity_30606", "image": "PMC-VQA/images/PMC3407708_F5_147299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of images were taken of the cervical spine? \nChoices:\n(A) Sagittal and Coronal \n(B) Axial and Oblique \n(C) Transverse and MIP \n(D) Lateral and Computed Tomography " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.90625, "loss": 4.196803092956543 }, { "id": "identity_310257", "image": "FigureQA/images/97576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.875, "loss": 4.303340911865234 }, { "id": "identity_284500", "image": "A-OKVQA/images/251723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which video game console is positioned vertically among the following options: Playstation, Dreamcast, Xbox, or Wii?\nChoices:\n(A) playstation\n(B) dreamcast\n(C) xbox\n(D) wii" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.90625, "loss": 4.046050071716309 }, { "id": "identity_26958", "image": "A-OKVQA/images/448694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The object they are reaching for resembles what?\nChoices:\n(A) cone\n(B) car\n(C) bucket\n(D) flying saucer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.515625, "loss": 4.052577018737793 }, { "id": "identity_331606", "image": "ScienceQA/images/9466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Annapolis\n(B) Biloxi\n(C) Albany\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.625, "loss": 4.721351146697998 }, { "id": "identity_275685", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shiny airplane in front of the large purple object and the metallic airplane behind the large airplane of the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.421875, "loss": 4.930474758148193 }, { "id": "identity_300021", "image": "PMC-VQA/images/PMC6110783_Fig2_359575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the experiment depicted in the image?\nChoices:\n(A) Analysis of AQP4 in the brain\n(B) Investigation of the blood-brain barrier in EAE mice\n(C) Examination of GFAP expression in astrocytes\n(D) Comparison of spinal cord structure in healthy mice and EAE mice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.1875, "loss": 4.207808971405029 }, { "id": "identity_326494", "image": "A-OKVQA/images/97379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What missing items allow kites to be easily flown here?\nChoices:\n(A) kids\n(B) power lines\n(C) trains\n(D) parents" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.90625, "loss": 4.815475940704346 }, { "id": "identity_300963", "image": "ScienceQA/images/14827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Frankfort\n(B) Columbia\n(C) Oklahoma City\n(D) Detroit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.78125, "loss": 4.309498310089111 }, { "id": "identity_26452", "image": "IconQA/images/53715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a dog. Which picture is ninth?\nChoices:\n(A) dog\n(B) house\n(C) paw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.984375, "loss": 4.321678161621094 }, { "id": "identity_33402", "image": "ScienceQA/images/6907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Provo\n(B) Baltimore\n(C) Annapolis\n(D) Providence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.265625, "loss": 4.335372447967529 }, { "id": "identity_304555", "image": "DocVQA/images/rlvm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the expected output mentioned in the analysis?" }, { "from": "gpt", "value": "The answer is report" } ], "clip_score": 21.765625, "loss": 4.875443935394287 }, { "id": "identity_288228", "image": "PMC-VQA/images/PMC2741099_fig1_45777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the arrow in the image?\nChoices:\n(A) Cancer cells in the blood vessels \n(B) Cancer cells invading the extracellular matrix \n(C) Normal cells in the extracellular matrix \n(D) Blood vessels in the extracellular matrix" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.328125, "loss": 4.549975872039795 }, { "id": "identity_43151", "image": "DocVQA/images/myhw0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the individual affiliated with General Mills, Inc.?" }, { "from": "gpt", "value": "The answer is Whitehill" } ], "clip_score": 19.46875, "loss": 4.8221611976623535 }, { "id": "identity_261520", "image": "TextVQA/images/629b9642469d7ec4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this location currently crowded?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 19.65625, "loss": 4.859133243560791 }, { "id": "identity_281646", "image": "PMC-VQA/images/PMC7049819_fig3_126777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in the image signify?\nOptions:\n(A) Biomaterial residual\n(B) Osteocyte\n(C) Blood vessel\n(D) Bone marrow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.890625, "loss": 4.759465217590332 }, { "id": "identity_303609", "image": "A-OKVQA/images/314690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What small creature might be living here?\nChoices:\n(A) baby\n(B) midget\n(C) mini monster\n(D) monkey" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.265625, "loss": 4.15117883682251 }, { "id": "identity_185606", "image": "TextVQA/images/a3b05e67061aecaa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handles are on the trophy?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.828125, "loss": 4.904426574707031 }, { "id": "identity_314593", "image": "FigureQA/images/36520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the smallest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.1875, "loss": 4.101746082305908 }, { "id": "identity_278259", "image": "DVQA/images/bar_train_00131888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values exceeding 7?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 21.328125, "loss": 4.186166763305664 }, { "id": "identity_30145", "image": "ScienceQA/images/16346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Trenton\n(B) Fargo\n(C) Montpelier\n(D) Newark" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.53125, "loss": 4.16325569152832 }, { "id": "identity_273861", "image": "ScienceQA/images/476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\nChoices:\n(A) Newport\n(B) Augusta\n(C) Providence\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.71875, "loss": 4.034457683563232 }, { "id": "identity_3134", "image": "PMC-VQA/images/PMC3328163_fig5_134184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What finding on the CT images suggests the first stroke is a hemorrhagic infarct rather than a hemorrhagic stroke? \nChoices:\n(A) Dense left middle cerebral artery sign on axial view. \n(B) Dense right middle cerebral artery sign on coronal view. \n(C) Presence of acute hydrocephalus. \n(D) Presence of mass effect. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.125, "loss": 4.075101852416992 }, { "id": "identity_15120", "image": "FigureQA/images/29896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru greater than Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.71875, "loss": 4.1247029304504395 }, { "id": "identity_312723", "image": "FigureQA/images/97246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8671875, "loss": 4.145515441894531 }, { "id": "identity_195667", "image": "A-OKVQA/images/406949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the signboard to the left of the traffic light?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 29.703125, "loss": 4.28863525390625 }, { "id": "identity_25497", "image": "VQA-AS/images/5763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the woman kneeling?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.515625, "loss": 4.158689022064209 }, { "id": "identity_295056", "image": "PMC-VQA/images/PMC7592484_f2_284001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a patient with a CT pattern of UIP, which lung zones are impacted? \nChoices:\n(A) upper zones\n(B) middle zones\n(C) lower zones\n(D) all zones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 4.002614498138428 }, { "id": "identity_88118", "image": "GeoQA+/images/100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the path Xiao Ming walks before returning to point A?" }, { "from": "gpt", "value": "The answer is A regular decagon" } ], "clip_score": 28.828125, "loss": 4.137760639190674 }, { "id": "identity_325786", "image": "PMC-VQA/images/PMC3885112_RRT079F3_257329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is clearly visible in this image?\nChoices:\n(A) Contour of a gross tumor volume\n(B) Muscles in a tongue\n(C) Applicator points in a tongue\n(D) Bone structure of a jaw" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.8125, "loss": 4.993101596832275 }, { "id": "identity_291795", "image": "ScienceQA/images/8962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\nChoices:\n(A) Des Moines\n(B) Cedar Rapids\n(C) Davenport\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.40625, "loss": 4.199023246765137 }, { "id": "identity_189009", "image": "VQA2.0/images/233093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible vehicles are parked in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 11.1171875, "loss": 4.803106784820557 }, { "id": "identity_17316", "image": "TextVQA/images/064068705af4ecab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of watch it this?" }, { "from": "gpt", "value": "The answer is rolec" } ], "clip_score": 25.25, "loss": 4.639298439025879 }, { "id": "identity_327487", "image": "ScienceQA/images/1125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Denver\n(B) Salem\n(C) Olympia\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.315074920654297 }, { "id": "identity_1690", "image": "ScienceQA/images/14899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.6875, "loss": 4.487249374389648 }, { "id": "identity_33202", "image": "PMC-VQA/images/PMC5014933_fig3_71028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the superior mesenteric artery in relation to the superior mesenteric vein? \nChoices:\n(A) To its left \n(B) Below it \n(C) To its right \n(D) Above it. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.453125, "loss": 4.04114294052124 }, { "id": "identity_262055", "image": "AI2D/images/1836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the nourishment requirement for the grasshopper?\nChoices:\n(A) grass\n(B) deer\n(C) snail\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.4375, "loss": 4.071853160858154 }, { "id": "identity_213656", "image": "A-OKVQA/images/199136.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color worn by the audience?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 14.5390625, "loss": 4.288140773773193 }, { "id": "identity_282031", "image": "PMC-VQA/images/PMC4937711_F19_49533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI result regarding the area of the right cavernous sinus?\nChoices:\n(A) Hyperintense soft tissue deposit \n(B) Hypointense soft tissue deposit \n(C) Isointense soft tissue deposit \n(D) No soft tissue deposit seen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.890625, "loss": 4.348855018615723 }, { "id": "identity_328301", "image": "TextVQA/images/980c3b567809621d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this place?" }, { "from": "gpt", "value": "The answer is publix" } ], "clip_score": 21.9375, "loss": 4.600147247314453 }, { "id": "identity_225352", "image": "GeoQA+/images/3380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在平行四边形ABCD中,E为AD的中点,△DEF的周长为1,求△BCF的面积" }, { "from": "gpt", "value": "The answer is height" } ], "clip_score": 23.828125, "loss": 5.305912971496582 }, { "id": "identity_286179", "image": "PMC-VQA/images/PMC8238105_f1_481627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the image depicted in the caption? \nOptions:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.091913223266602 }, { "id": "identity_318080", "image": "ScienceQA/images/3304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which creature's feet are also adapted for sticking to smooth surfaces?\nChoices:\n(A) monitor lizard\n(B) Costa Rica brook frog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.9375, "loss": 4.047082901000977 }, { "id": "identity_249643", "image": "PMC-VQA/images/PMC6224014_Fig4_393846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the dotted lines in c and e indicate, and how does the structure in image f relate to this indication?" }, { "from": "gpt", "value": "The answer is Image f shows a PED similar to c and e" } ], "clip_score": 25.703125, "loss": 4.3271684646606445 }, { "id": "identity_28117", "image": "ScienceQA/images/5485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) soft\n(B) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.421875, "loss": 4.422303676605225 }, { "id": "identity_27730", "image": "IconQA/images/79174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.796875, "loss": 4.0158233642578125 }, { "id": "identity_120304", "image": "Super-CLEVR/images/superCLEVR_new_000595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located in the middle of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.234375, "loss": 4.028605937957764 }, { "id": "identity_59042", "image": "MapQA/images/map_9890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the range 61-61?" }, { "from": "gpt", "value": "The answer is Light grey" } ], "clip_score": 17.8125, "loss": 4.46065092086792 }, { "id": "identity_25965", "image": "VQA-AS/images/3122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are their shirts the same color?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.84375, "loss": 4.137913703918457 }, { "id": "identity_266738", "image": "AI2D/images/3856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the bottom part in this diagram:\n(A) hypodermis\n(B) ground tissue\n(C) epidermis\n(D) xylem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.375, "loss": 4.1065473556518555 }, { "id": "identity_193267", "image": "A-OKVQA/images/424434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cooking pots can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.59375, "loss": 4.184500694274902 }, { "id": "identity_293557", "image": "PMC-VQA/images/PMC2766888_F0002_48677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the bone marrow edema located among the following choices: (A) Femoral neck, (B) Acetabulum, (C) Femoral head, or (D) Ilium?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.421875, "loss": 4.242722034454346 }, { "id": "identity_306047", "image": "FigureQA/images/91759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 4.370638847351074 }, { "id": "identity_149587", "image": "DocVQA/images/xngv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the individual planning to attend the meeting?" }, { "from": "gpt", "value": "The answer is Francoise PONT" } ], "clip_score": 23.109375, "loss": 4.365916728973389 }, { "id": "identity_292967", "image": "PMC-VQA/images/PMC2365560_pone-0002125-g002_21766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is utilized to visualize tumors among the following options: CT scan, MRI, X-ray, or PET scan?\nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.75, "loss": 4.388330936431885 }, { "id": "identity_310935", "image": "FigureQA/images/38538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.6600141525268555 }, { "id": "identity_320839", "image": "PMC-VQA/images/PMC8085189_Fig12_431025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the triangle-shaped area here? \nChoices:\n(A) An area of lung hemorrhage \n(B) Part of the surgical tool used during the procedure \n(C) Scar tissue from a previous medical procedure \n(D) A blood vessel in the lung tissue." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.875, "loss": 4.14709997177124 }, { "id": "identity_263957", "image": "DVQA/images/bar_train_00006038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm in the dataset has a higher accuracy, the one labeled \"budget\" or the one labeled \"tone\"?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 33.71875, "loss": 4.131171226501465 }, { "id": "identity_192611", "image": "VQA2.0/images/381476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is taking place in the image?" }, { "from": "gpt", "value": "The answer is Urban skiing exhibition or competition" } ], "clip_score": 30.953125, "loss": 4.345634937286377 }, { "id": "identity_323378", "image": "ScienceQA/images/9313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this clothespin made of?\nChoices:\n(A) porcelain\n(B) metal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.140625, "loss": 4.052355766296387 }, { "id": "identity_338201", "image": "AI2D/images/3884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the lateral bud?\nChoices:\n(A) E\n(B) I\n(C) J\n(D) G" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.3515625, "loss": 4.091369152069092 }, { "id": "identity_119306", "image": "VQA-AS/images/525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are visible in the sky?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.515625, "loss": 4.164328098297119 }, { "id": "identity_333829", "image": "A-OKVQA/images/395339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of shop are people gathered near?\nChoices:\n(A) flag\n(B) coffee\n(C) resale\n(D) bike" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.734375, "loss": 4.048999309539795 }, { "id": "identity_17747", "image": "ScienceQA/images/14186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Jackson\n(B) Charleston\n(C) Columbia\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.359375, "loss": 4.1857452392578125 }, { "id": "identity_71005", "image": "ScienceQA/images/14116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible in this image excluding the highlighted one?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.546875, "loss": 4.370959281921387 }, { "id": "identity_248313", "image": "ScienceQA/images/20530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the common properties of these two objects, and which one of these properties is related to their color and state of matter?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 35.15625, "loss": 6.311620712280273 }, { "id": "identity_277964", "image": "VQA2.0/images/160848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the boat named after a woman?" }, { "from": "gpt", "value": "The answer is female" } ], "clip_score": 20.4375, "loss": 5.404062747955322 }, { "id": "identity_218898", "image": "VizWiz/images/VizWiz_train_00010672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is Rusty's according to the advertisement on the vehicle?" }, { "from": "gpt", "value": "The answer is Restoring Hotel" } ], "clip_score": 27.546875, "loss": 5.401065349578857 }, { "id": "identity_265038", "image": "FigureQA/images/97550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.828125, "loss": 4.006404399871826 }, { "id": "identity_197158", "image": "PMC-VQA/images/PMC4585177_F4_427482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many areas of high activity are shown in the brain scan?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 30.515625, "loss": 4.6733856201171875 }, { "id": "identity_23604", "image": "FigureQA/images/53217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.375, "loss": 4.165704250335693 }, { "id": "identity_331661", "image": "A-OKVQA/images/463151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for a large group of people watching something?\nChoices:\n(A) gang\n(B) family\n(C) crowd\n(D) colony" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.8125, "loss": 4.280045032501221 }, { "id": "identity_16581", "image": "PMC-VQA/images/PMC6701896_FIG2_25982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the solid red arrow in the image indicate? \nChoices:\n(A) Damage to the optic nerve \n(B) Enhancement of the prelaminar optic nerve \n(C) Protrusion of the intraocular optic nerve \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.625, "loss": 4.154329299926758 }, { "id": "identity_9810", "image": "VQA-AS/images/8076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the boy to the right playing soccer?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.265625, "loss": 4.047664642333984 }, { "id": "identity_38142", "image": "PMC-VQA/images/PMC7713440_Fig1_314367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What condition is the patient depicted in images a-i suffering from? \nChoices:\n(A) Multiple sclerosis \n(B) Crohn's disease \n(C) Autoimmune retinopathy \n(D) Astigmatism " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.15625, "loss": 4.623376846313477 }, { "id": "identity_332266", "image": "PMC-VQA/images/PMC2766888_F0002_48677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the bone marrow edema located?\nChoices:\n(A) Femoral neck\n(B) Acetabulum\n(C) Femoral head\n(D) Ilium" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.140625, "loss": 4.357921600341797 }, { "id": "identity_148205", "image": "DVQA/images/bar_train_00089131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the least total sales across all item categories?" }, { "from": "gpt", "value": "The answer is teeth store" } ], "clip_score": 31.140625, "loss": 4.171748638153076 }, { "id": "identity_300833", "image": "A-OKVQA/images/365217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tennis swing is the person on the bottom of the court in the middle of?\nChoices:\n(A) serve\n(B) backhand\n(C) drop shot\n(D) forearm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.015625, "loss": 4.393797874450684 }, { "id": "identity_283000", "image": "A-OKVQA/images/172690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended target to prevent the ball from touching? \nChoices:\n(A) grass\n(B) sand\n(C) people\n(D) net" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.873419284820557 }, { "id": "identity_25702", "image": "DocVQA/images/rgvh0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which name, the check is issued to?" }, { "from": "gpt", "value": "The answer is NECSA" } ], "clip_score": 20.0625, "loss": 4.165153503417969 }, { "id": "identity_334371", "image": "TextVQA/images/f5fcdb109b888ee0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this bus heading?" }, { "from": "gpt", "value": "The answer is moscou" } ], "clip_score": 32.53125, "loss": 4.206563949584961 }, { "id": "identity_37736", "image": "PMC-VQA/images/PMC5969794_FIG2_319134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the left cerebral artery is affected? \nChoices:\n(A) M1 \n(B) M2 \n(C) M3 \n(D) M4 " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.8125, "loss": 4.025834560394287 }, { "id": "identity_210310", "image": "PMC-VQA/images/PMC7842140_f9_355122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall shape of the brain section in the image?" }, { "from": "gpt", "value": "The answer is Circular with natural brain contours" } ], "clip_score": 28.015625, "loss": 4.780970573425293 }, { "id": "identity_834", "image": "ScienceQA/images/17856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) Europe\n(B) Australia\n(C) Antarctica\n(D) South America" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.906271934509277 }, { "id": "identity_210722", "image": "A-OKVQA/images/204074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the lead biker's jersey?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.84375, "loss": 4.186288356781006 }, { "id": "identity_31265", "image": "A-OKVQA/images/196294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What group usually uses this mode of transport?\nChoices:\n(A) amish\n(B) paratroopers\n(C) army rangers\n(D) pilots" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.3828125, "loss": 4.255512714385986 }, { "id": "identity_287219", "image": "IconQA/images/78656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a square as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.265625, "loss": 4.0840630531311035 }, { "id": "identity_23125", "image": "PMC-VQA/images/PMC7285538_Fig5_189012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining method used in the image? \nChoices:\n(A) IHC staining \n(B) EM staining \n(C) H&E staining \n(D) Giemsa staining " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.25, "loss": 4.379408359527588 }, { "id": "identity_258866", "image": "A-OKVQA/images/398301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three distinct characteristics of this event that differentiate it from a farmers market, fair, and flea market?" }, { "from": "gpt", "value": "The answer is Second-hand items, residential setting, small and informal" } ], "clip_score": 24.96875, "loss": 4.064067840576172 }, { "id": "identity_36917", "image": "A-OKVQA/images/84724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transportation is shown?\nChoices:\n(A) air\n(B) water\n(C) rail\n(D) road" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.3671875, "loss": 4.478636741638184 }, { "id": "identity_273474", "image": "VQA2.0/images/71608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the backpack of the rider in red?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 22.6875, "loss": 4.004474639892578 }, { "id": "identity_22963", "image": "TQA/images/question_images/human_system_nervous_2854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the front area of the frontal lobe called?\nChoices:\n(A) Central Sulcus\n(B) Prefrontal Lobe\n(C) Sylvian Fissure\n(D) Temporal Lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.453125, "loss": 4.252434730529785 }, { "id": "identity_310051", "image": "ScienceQA/images/8519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Annapolis\n(B) Montpelier\n(C) Cambridge\n(D) Dover" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.546875, "loss": 4.4967780113220215 }, { "id": "identity_297677", "image": "PMC-VQA/images/PMC6298256_f1_412561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is being shown in the image provided? \nOptions:\n(A) Brain\n(B) Abdomen\n(C) Chest\n(D) Foot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.796875, "loss": 4.169421195983887 }, { "id": "identity_25112", "image": "PMC-VQA/images/PMC5775363_Fig2_266010.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for these images? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.75, "loss": 4.733158588409424 }, { "id": "identity_300296", "image": "IconQA/images/37479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a paw. Which one is seventh?\nChoices:\n(A) dog\n(B) house\n(C) paw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.390625, "loss": 4.40632963180542 }, { "id": "identity_269409", "image": "A-OKVQA/images/238086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which destination is the person on the bike heading?\nChoices:\n(A) garage\n(B) candy shop\n(C) tea shop\n(D) market" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.0625, "loss": 4.028317451477051 }, { "id": "identity_7809", "image": "TabMWP/images/17660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.984375, "loss": 4.515384674072266 }, { "id": "identity_69503", "image": "ScienceQA/images/10993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the beaver in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer and prey species" } ], "clip_score": 32.34375, "loss": 4.242326259613037 }, { "id": "identity_273536", "image": "TQA/images/abc_question_images/layers_of_atmosphere_17067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter is used to represent the troposphere?\nChoices:\n(A) M\n(B) N\n(C) W\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.90625, "loss": 4.130617141723633 }, { "id": "identity_8595", "image": "TQA/images/abc_question_images/human_system_digestive_16071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Colon?\nChoices:\n(A) P\n(B) R\n(C) F\n(D) Y" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0, "loss": 4.19819974899292 }, { "id": "identity_325983", "image": "PMC-VQA/images/PMC9408430_genes-13-01431-f003_382438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the anterior segment of this case?\nChoices:\n(A) Thick lens and aniridia\n(B) Thin lens and aniridia\n(C) Thick lens and absence of aniridia\n(D) Thin lens and presence of aniridia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.303071022033691 }, { "id": "identity_316725", "image": "Super-CLEVR/images/superCLEVR_new_000482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small metallic things in front of a small blue thing than matte objects in front of a gray mountain bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.0625, "loss": 5.080080509185791 }, { "id": "identity_282439", "image": "PMC-VQA/images/PMC7853585_F1_358067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cell is depicted in the image provided? \nOptions:\n(A) HepG2\n(B) HeLa\n(C) SMMC-7721\n(D) A549" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.65625, "loss": 4.103673934936523 }, { "id": "identity_295049", "image": "TabMWP/images/37672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With $8 in her possession, can Cindy afford to purchase both a pair of jeans and a pink sweater?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.375, "loss": 4.198043346405029 }, { "id": "identity_34439", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the black asterisk in the image? \nChoices:\n(A) Connective tissue \n(B) M2 macrophages \n(C) Newly formed bone \n(D) Xenogeneic BSM granules. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.3125, "loss": 4.590988636016846 }, { "id": "identity_56406", "image": "DVQA/images/bar_train_00047919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest value for the 'joy' group?" }, { "from": "gpt", "value": "The answer is mad" } ], "clip_score": 25.8125, "loss": 4.573418617248535 }, { "id": "identity_34747", "image": "Super-CLEVR/images/superCLEVR_new_000010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the blue scooter and the brown thing behind the bicycle have the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.265625, "loss": 5.6057562828063965 }, { "id": "identity_214331", "image": "A-OKVQA/images/365271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.546875, "loss": 4.312874794006348 }, { "id": "identity_313030", "image": "TextVQA/images/727356147414700a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written above this logo?" }, { "from": "gpt", "value": "The answer is the world famous" } ], "clip_score": 16.34375, "loss": 5.811367034912109 }, { "id": "identity_182276", "image": "A-OKVQA/images/481563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arches does the bridge have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.890625, "loss": 4.188559532165527 }, { "id": "identity_131432", "image": "VQA-AS/images/1593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is the child holding in their left hand?" }, { "from": "gpt", "value": "The answer is A CD or DVD" } ], "clip_score": 24.90625, "loss": 4.45125150680542 }, { "id": "identity_219002", "image": "PMC-VQA/images/PMC7781868_FIG7_334373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red color in this image?" }, { "from": "gpt", "value": "The answer is Blood flow towards the probe" } ], "clip_score": 27.4375, "loss": 4.739016532897949 }, { "id": "identity_298957", "image": "PMC-VQA/images/PMC7399856_diagnostics-10-00450-f004_225619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the CT image among the following options: brain, liver, lungs, or intestines?\nChoices:\n(A) brain \n(B) liver \n(C) lungs \n(D) intestines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.484375, "loss": 5.291903972625732 }, { "id": "identity_212469", "image": "VQA2.0/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct types of pastries can be seen on the table?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 23.609375, "loss": 4.865885257720947 }, { "id": "identity_29067", "image": "PMC-VQA/images/PMC6945265_f1_95060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain appears to be well separated in the image? \nChoices:\n(A) Thalami \n(B) Mesencephalon \n(C) Septum pellucidum \n(D) Interhemispheric fissure " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.859375, "loss": 4.483381748199463 }, { "id": "identity_250430", "image": "PMC-VQA/images/PMC3253128_pone-0029958-g015_121427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the images in the caption, and how does the orientation affect the interpretation of the structural features observed in the image?" }, { "from": "gpt", "value": "The answer is Horizontal orientation highlights side perspective of structural features" } ], "clip_score": 17.453125, "loss": 4.203307151794434 }, { "id": "identity_9432", "image": "IconQA/images/56837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape flat or solid?\nChoices:\n(A) flat\n(B) solid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 4.049893856048584 }, { "id": "identity_206491", "image": "PMC-VQA/images/PMC9406403_cancers-14-03989-f001_381174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the lesion observed in the image?" }, { "from": "gpt", "value": "The answer is Irregular oval" } ], "clip_score": 30.75, "loss": 4.170993328094482 }, { "id": "identity_205958", "image": "VQA-AS/images/6563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.046875, "loss": 4.457851409912109 }, { "id": "identity_190277", "image": "PMC-VQA/images/PMC4955135_Fig2_53494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 25.8125, "loss": 4.058676242828369 }, { "id": "identity_293613", "image": "PMC-VQA/images/PMC6947777_f1_96231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the white line in the figure?\nChoices:\n(A) The border of the image \n(B) The area affected by a particular disease \n(C) The trajectory of the scanner \n(D) The location of a medical facility" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.609375, "loss": 4.193068027496338 }, { "id": "identity_38048", "image": "A-OKVQA/images/94610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What form of currency allows cars to park here?\nChoices:\n(A) cards\n(B) dollars\n(C) coins\n(D) checks only" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.6796875, "loss": 4.330967903137207 }, { "id": "identity_324549", "image": "Super-CLEVR/images/superCLEVR_new_001096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the tiny matte object that matches the color of the tiny metallic biplane?" }, { "from": "gpt", "value": "The answer is school bus" } ], "clip_score": 27.265625, "loss": 4.274105548858643 }, { "id": "identity_290818", "image": "A-OKVQA/images/121461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the usual diet of these cows among the following options: grass, bugs, trees, or small animals?\nChoices:\n(A) grass\n(B) bugs\n(C) trees\n(D) small animals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0, "loss": 4.099595546722412 }, { "id": "identity_314259", "image": "ScienceQA/images/20631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following was a variable in this experiment?\nChoices:\n(A) the number of days until a seed germinated\n(B) the temperature of the heating pad" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.53125, "loss": 4.746008396148682 }, { "id": "identity_291769", "image": "A-OKVQA/images/338960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which country does the Christmas tree originate?\nChoices:\n(A) Germany\n(B) England\n(C) Switzerland\n(D) Russia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 4.18900203704834 }, { "id": "identity_208941", "image": "A-OKVQA/images/405060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green cylindrical objects are there in the top right corner of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 10.546875, "loss": 4.788678169250488 }, { "id": "identity_292223", "image": "A-OKVQA/images/147879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found on the ceiling among the following options: balloons, lights, bats, or squid?\nChoices:\n(A) balloons\n(B) lights\n(C) bats\n(D) squid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.390625, "loss": 4.087612152099609 }, { "id": "identity_2374", "image": "DVQA/images/bar_train_00065280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 24.421875, "loss": 4.190840721130371 }, { "id": "identity_27461", "image": "DVQA/images/bar_train_00111743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain any negative values?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.21875, "loss": 4.148787498474121 }, { "id": "identity_311356", "image": "PlotQA/images/79565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How are the labels in the legend arranged?" }, { "from": "gpt", "value": "The answer is vertical" } ], "clip_score": 17.84375, "loss": 4.716965675354004 }, { "id": "identity_258247", "image": "A-OKVQA/images/298784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image, and what is the large silver object in the middle of the group?" }, { "from": "gpt", "value": "The answer is Six bicycles, boat" } ], "clip_score": 23.390625, "loss": 4.046056747436523 }, { "id": "identity_289914", "image": "A-OKVQA/images/73724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the speaker cloth coverings among the following choices: green, red, black, or gray?\nChoices:\n(A) green\n(B) red\n(C) black\n(D) gray" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.4609375, "loss": 4.055634498596191 }, { "id": "identity_309048", "image": "VQA-AS/images/7863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sun behind?" }, { "from": "gpt", "value": "The answer is tree" } ], "clip_score": 18.640625, "loss": 4.298677921295166 }, { "id": "identity_282598", "image": "TabMWP/images/12091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Stanley have sufficient funds to purchase both a basketball and a volleyball with his $34.00?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 4.038125038146973 }, { "id": "identity_215797", "image": "Super-CLEVR/images/superCLEVR_new_000942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is the same material as the motorcycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.40625, "loss": 4.4598259925842285 }, { "id": "identity_333858", "image": "IconQA/images/27960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.03125, "loss": 4.166130065917969 }, { "id": "identity_320702", "image": "A-OKVQA/images/552054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of zone is this place likely to be?\nChoices:\n(A) business\n(B) tourist\n(C) residential\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.1015625, "loss": 4.355963230133057 }, { "id": "identity_271983", "image": "A-OKVQA/images/183620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the target of the woman's attempt to hit? Is it:\n(A) boxes\n(B) speakers\n(C) containers\n(D) televisions" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.7734375, "loss": 4.630135536193848 }, { "id": "identity_285553", "image": "PMC-VQA/images/PMC6414137_acm212546-fig-0003_449313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of reconstruction is depicted on the right side?\nChoices:\n(A) Monoenergetic reconstruction\n(B) CatPhan reconstruction\n(C) DD 140 kV reconstruction\n(D) Reconstruction on the left" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.09375, "loss": 4.098474979400635 }, { "id": "identity_305138", "image": "FigureQA/images/91493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve the largest for Chocolate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.265625, "loss": 4.0210280418396 }, { "id": "identity_298496", "image": "A-OKVQA/images/53825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the most probable target for the baseball to hit next? \n(A) pitcher\n(B) wall\n(C) audience\n(D) baseball bat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.0625, "loss": 4.712746620178223 }, { "id": "identity_31670", "image": "DVQA/images/bar_train_00071123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values smaller than 1000000000?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 22.234375, "loss": 4.218298435211182 }, { "id": "identity_278884", "image": "TextVQA/images/8eb071a39f203845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the phrase \"children's physicare\" located above the number 8 on the blue shirt?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.890625, "loss": 4.118322372436523 }, { "id": "identity_290136", "image": "TQA/images/question_images/cycle_water_1502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term refers to rain among the following options: (A) Precipitation, (B) Sunshine, (C) Collection, or (D) Evaporation?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.4375, "loss": 4.672494411468506 }, { "id": "identity_294452", "image": "A-OKVQA/images/540762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the circular dais located in the center of the ancient tower?\nOptions:\n(A) blue\n(B) white\n(C) gray\n(D) red" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.390625, "loss": 4.246512413024902 }, { "id": "identity_284642", "image": "A-OKVQA/images/136542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is chopped near the baby elephant? \n(A) food\n(B) tree trunk\n(C) potatoes\n(D) car tires" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.075587272644043 }, { "id": "identity_128136", "image": "PMC-VQA/images/PMC5309216_F1_127713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine if the lesion is a result of an ischemic stroke based on this image alone?" }, { "from": "gpt", "value": "The answer is No, additional information needed" } ], "clip_score": 33.71875, "loss": 4.226884841918945 }, { "id": "identity_324459", "image": "PMC-VQA/images/PMC5867574_jcm-07-00048-f003_293052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to capture this view?\nChoices:\n(A) MRI.\n(B) CT.\n(C) X-ray.\n(D) Ultrasound." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.40625, "loss": 4.311098575592041 }, { "id": "identity_235399", "image": "A-OKVQA/images/181453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in preparing the bike for transport, and why is it necessary to disassemble it?" }, { "from": "gpt", "value": "The answer is To fit into limited space and prevent damage during transport" } ], "clip_score": 30.015625, "loss": 4.132726669311523 }, { "id": "identity_288277", "image": "FigureQA/images/17510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.1171875, "loss": 4.747461795806885 }, { "id": "identity_13122", "image": "FigureQA/images/85092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Saddle Brown have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.875, "loss": 4.496134281158447 }, { "id": "identity_273364", "image": "AI2D/images/3724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the tongue located in the mouth?\nChoices:\n(A) False it is in the stomach\n(B) True\n(C) False it is in the liver\n(D) False" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.296875, "loss": 4.07876443862915 }, { "id": "identity_311098", "image": "PMC-VQA/images/PMC9044620_Fig1_266346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the right lower lobe?\nChoices:\n(A) Fluid-filled cysts \n(B) Semi-solid ground-glass attenuation \n(C) Calcified nodules \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.28125, "loss": 4.487159252166748 }, { "id": "identity_295894", "image": "TabMWP/images/21072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which destination on the hiking trails near Manuel's campground has a higher elevation from sea level, the underground lake in the small cave or the mountain peak?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.75, "loss": 4.338350772857666 }, { "id": "identity_17866", "image": "A-OKVQA/images/146294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does Fly Emirates provide to the game?\nChoices:\n(A) drink\n(B) food\n(C) sponsor\n(D) transportation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5, "loss": 4.078383922576904 }, { "id": "identity_324019", "image": "A-OKVQA/images/187852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What job do many of the individuals shown here share?\nChoices:\n(A) fire fighters\n(B) journalism\n(C) baby photographers\n(D) politicians" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.890625, "loss": 4.330721378326416 }, { "id": "identity_114870", "image": "Super-CLEVR/images/superCLEVR_new_001111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more airplanes or buses in the image?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 21.234375, "loss": 4.030156135559082 }, { "id": "identity_304915", "image": "PMC-VQA/images/PMC6127925_Fig5_364669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the lower part of the image?\nChoices:\n(A) Single array section\n(B) Carbon nanotube tape substrate\n(C) Magnified region\n(D) Multibeam SEM imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.828125, "loss": 4.664747714996338 }, { "id": "identity_294499", "image": "PMC-VQA/images/PMC3314671_pone-0033745-g002_131826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the highlighting of the yellow circle in the image?\nChoices:\n(A) Because it is associated with visual processing \n(B) Because it is related to language production \n(C) Because it is implicated in emotional regulation \n(D) Because it is part of the auditory cortex" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.734375, "loss": 4.630445957183838 }, { "id": "identity_316386", "image": "A-OKVQA/images/370337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where have the vessels stopped?\nChoices:\n(A) near barge\n(B) at lighthouse\n(C) at dock\n(D) on island" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.484375, "loss": 4.013062477111816 }, { "id": "identity_316753", "image": "A-OKVQA/images/498436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable drink in the filled cups on the table?\nChoices:\n(A) fruit drink\n(B) coffee\n(C) water\n(D) soda" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7734375, "loss": 4.511898994445801 }, { "id": "identity_187592", "image": "TextVQA/images/c5227a830da85f77.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many drawers are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.375, "loss": 4.178916931152344 }, { "id": "identity_258277", "image": "A-OKVQA/images/35711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many people in wetsuits are standing before the crashing wave, and how many are in the water but not standing" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 32.53125, "loss": 4.249772548675537 }, { "id": "identity_198318", "image": "PMC-VQA/images/PMC4600094_Fig4_431725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the red marking indicating in the image?" }, { "from": "gpt", "value": "The answer is Intimal flap" } ], "clip_score": 25.890625, "loss": 4.843188762664795 }, { "id": "identity_336568", "image": "PMC-VQA/images/PMC2640480_F4_34350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in Fig. 4a that aids in identifying the flap?\nChoices:\n(A) hair follicles,\n(B) blood vessels,\n(C) sweat glands,\n(D) muscles" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.546875, "loss": 4.800512790679932 }, { "id": "identity_13437", "image": "AI2D/images/3101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does E represent?\nChoices:\n(A) Vacuole\n(B) Plastide\n(C) Centrosoom\n(D) Chromosoom" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.40625, "loss": 4.309879779815674 }, { "id": "identity_122385", "image": "VQA2.0/images/30619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name on the banner in the background?" }, { "from": "gpt", "value": "The answer is The brand name is \"northwave" } ], "clip_score": 19.65625, "loss": 4.41208553314209 }, { "id": "identity_282513", "image": "ScienceQA/images/19362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is being indicated by the highlight?\nOptions:\n(A) Rhode Island\n(B) Delaware\n(C) Massachusetts\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.28125, "loss": 4.041539192199707 }, { "id": "identity_325740", "image": "ScienceQA/images/13775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tucson\n(B) Phoenix\n(C) Providence\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.375, "loss": 4.298618316650391 }, { "id": "identity_265431", "image": "DVQA/images/bar_train_00133890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is the least favored?" }, { "from": "gpt", "value": "The answer is charge" } ], "clip_score": 22.34375, "loss": 4.134845733642578 }, { "id": "identity_138881", "image": "FigureQA/images/963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color bar has the second highest value?" }, { "from": "gpt", "value": "The answer is Tomato" } ], "clip_score": 20.8125, "loss": 4.036194324493408 }, { "id": "identity_23401", "image": "PMC-VQA/images/PMC7983206_Fig1_397040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrows? \nChoices:\n(A) external boundary of the ovary \n(B) follicles \n(C) corpora lutea \n(D) none of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.3125, "loss": 4.832884788513184 }, { "id": "identity_123413", "image": "VQA-RAD/images/synpic26413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the body is shown in the medical image?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 26.359375, "loss": 4.475411415100098 }, { "id": "identity_73527", "image": "ScienceQA/images/9830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 14.3671875, "loss": 4.349639415740967 }, { "id": "identity_291285", "image": "PMC-VQA/images/PMC4222972_pone-0112103-g001_333731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anatomical structure is indicated by the color green in the tissue segmentation?\nChoices:\n(A) cerebral-spinal fluid\n(B) white matter\n(C) gray matter\n(D) air" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.49849796295166 }, { "id": "identity_10051", "image": "FigureQA/images/91596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Violet Red have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.609375, "loss": 4.545999050140381 }, { "id": "identity_11142", "image": "PMC-VQA/images/PMC7546424_F7_269083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of algae is being studied in the confocal images? \nChoices:\n(A) Chlorella sp. \n(B) Spirulina sp. \n(C) Halothece sp. \n(D) Dunaliella sp. ' " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.140625, "loss": 4.094436168670654 }, { "id": "identity_313357", "image": "AI2D/images/778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates midnight?\nChoices:\n(A) D\n(B) B\n(C) A\n(D) E" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.328125, "loss": 4.767699241638184 }, { "id": "identity_274814", "image": "ScienceQA/images/7687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Delaware\n(B) New Hampshire\n(C) Illinois\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.224218368530273 }, { "id": "identity_189366", "image": "Super-CLEVR/images/superCLEVR_new_000257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the green bus?" }, { "from": "gpt", "value": "The answer is A yellow bus" } ], "clip_score": 25.171875, "loss": 4.20094633102417 }, { "id": "identity_70453", "image": "ScienceQA/images/10768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.078125, "loss": 4.55549955368042 }, { "id": "identity_112502", "image": "Super-CLEVR/images/superCLEVR_new_001425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the train at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.59375, "loss": 4.515575408935547 }, { "id": "identity_1684", "image": "FigureQA/images/20186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Aqua greater than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.036404132843018 }, { "id": "identity_315783", "image": "VizWiz/images/VizWiz_train_00013290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is this?" }, { "from": "gpt", "value": "The answer is green flowers" } ], "clip_score": 24.46875, "loss": 4.05312442779541 }, { "id": "identity_1037", "image": "Super-CLEVR/images/superCLEVR_new_001740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer big brown metal objects than yellow aeroplanes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.09375, "loss": 5.1346893310546875 }, { "id": "identity_287899", "image": "PMC-VQA/images/PMC4944022_fig2_51549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the observation made in the image of the right vertebral artery injection?\nChoices:\n(A) hypoplasia of the right Posterior inferior cerebellar arteries (PICA) (arrowhead) \n(B) hypoplasia of the left anterior inferior cerebellar arteries (AICA) (arrow) \n(C) abnormal dilatation of the right AICA (arrowhead) \n(D) the imaging is not clear to show any finding." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.109375, "loss": 4.007355690002441 }, { "id": "identity_205429", "image": "A-OKVQA/images/304158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the rocks in the enclosure?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 15.203125, "loss": 4.509393692016602 }, { "id": "identity_270015", "image": "A-OKVQA/images/402228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of TV from the following options: crt, projector, lcd, toy.\nChoices:\n(A) crt\n(B) projector\n(C) lcd\n(D) toy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.140625, "loss": 4.141567707061768 }, { "id": "identity_289485", "image": "PMC-VQA/images/PMC7399011_Fig1_225332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cancer is depicted in the image provided? \nOptions:\n(A) Rectal cancer\n(B) Transverse colon cancer\n(C) Sigmoid colon cancer\n(D) Descending colon cancer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.523525714874268 }, { "id": "identity_24763", "image": "DocVQA/images/sllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title?" }, { "from": "gpt", "value": "The answer is research asst" } ], "clip_score": 18.609375, "loss": 5.123560428619385 }, { "id": "identity_287199", "image": "VizWiz/images/VizWiz_train_00023929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the color of this object?" }, { "from": "gpt", "value": "The answer is blood orange red" } ], "clip_score": 20.015625, "loss": 5.0268473625183105 }, { "id": "identity_285259", "image": "PMC-VQA/images/PMC5029079_Fig1_74770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the correct name for the thoracic window shown in the images: \n(A) Mediastinal Window \n(B) Pulmonary Window \n(C) Abdominothoracic Window \n(D) Cardiovascular Window" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.03125, "loss": 4.606009006500244 }, { "id": "identity_16037", "image": "ScienceQA/images/10168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which air temperature was measured within the outlined area shown?\nChoices:\n(A) 5°C\n(B) -10°C\n(C) 0°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.28125, "loss": 4.005898475646973 }, { "id": "identity_271213", "image": "A-OKVQA/images/191061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color does the second vehicle have? Is it:\n(A) brown\n(B) pink\n(C) red\n(D) white" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.546875, "loss": 4.181909084320068 }, { "id": "identity_268950", "image": "TQA/images/abc_question_images/human_system_nervous_16177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the temporal lobe:\n(A) D\n(B) C\n(C) R\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5, "loss": 4.049330711364746 }, { "id": "identity_28903", "image": "PMC-VQA/images/PMC4533796_Fig6_413971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specimen shown in the image? \nChoices:\n(A) mice tumors \n(B) human tumors \n(C) mice brain tissue \n(D) human brain tissue " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.53125, "loss": 4.830618858337402 }, { "id": "identity_20361", "image": "IconQA/images/72958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.328125, "loss": 4.317978858947754 }, { "id": "identity_47740", "image": "MapQA/images/map_11051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different value ranges are represented in the legend?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 15.7890625, "loss": 4.155001163482666 }, { "id": "identity_32125", "image": "AI2D/images/158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The sun is responsible for:\nChoices:\n(A) Neap tide\n(B) Phases of the moon\n(C) Earth\n(D) Spring tide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.25, "loss": 4.16201639175415 }, { "id": "identity_200729", "image": "PMC-VQA/images/PMC7926624_children-08-00163-f001_382775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is primarily shown in the right panel of the image?" }, { "from": "gpt", "value": "The answer is Torso" } ], "clip_score": 23.453125, "loss": 4.005126476287842 }, { "id": "identity_138754", "image": "DocVQA/images/qkjk0079_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who does the individual with this performance plan report to?" }, { "from": "gpt", "value": "The answer is G. Novak" } ], "clip_score": 23.21875, "loss": 4.073627948760986 }, { "id": "identity_302362", "image": "A-OKVQA/images/493797.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of water area is depicted here?\nChoices:\n(A) harbor\n(B) river\n(C) open sea\n(D) stream" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.296875, "loss": 4.3741044998168945 }, { "id": "identity_5972", "image": "AI2D/images/4899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the volcano is choice A?\nChoices:\n(A) Magma\n(B) Main Vent\n(C) Crater\n(D) Magma Chamber" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.4375, "loss": 4.436415672302246 }, { "id": "identity_327766", "image": "Super-CLEVR/images/superCLEVR_new_001753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a cyan rubber object of the same size as this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.71875, "loss": 4.8058319091796875 }, { "id": "identity_289183", "image": "PMC-VQA/images/PMC6209937_sensors-18-03475-f008_388972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the yellow line shown in the image?\nChoices:\n(A) to show the position of the needle tip \n(B) to show the position of the needle shaft \n(C) to indicate a boundary on the B-mode ultrasound image \n(D) to represent a nerve." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.390625, "loss": 5.318756103515625 }, { "id": "identity_15930", "image": "TQA/images/question_images/biomes_6568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the biome in Central Australia?\nChoices:\n(A) Taiga\n(B) Desert\n(C) Tropical Seasonal\n(D) Tropical Rainforest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.828125, "loss": 4.20551061630249 }, { "id": "identity_255046", "image": "TQA/images/abc_question_images/human_system_nervous_16174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the letter representing the brain and the spinal cord in the diagram, and then determine the letter representing the peripheral nervous system" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 34.34375, "loss": 4.061455726623535 }, { "id": "identity_22926", "image": "PMC-VQA/images/PMC8876035_jcm-11-01002-f001_205890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the midbrain on axial FLAIR images? \nChoices:\n(A) Decreased signals \n(B) No change in signals \n(C) Increased signals \n(D) Irregular signals " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.1875, "loss": 4.1168212890625 }, { "id": "identity_116328", "image": "VQA2.0/images/477259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible people are wearing glasses?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 14.453125, "loss": 4.562211513519287 }, { "id": "identity_220282", "image": "VQA-AS/images/5091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.390625, "loss": 4.611902713775635 }, { "id": "identity_307163", "image": "Super-CLEVR/images/superCLEVR_new_000338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large yellow motorbike to the right of the chopper behind the green bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.765625, "loss": 5.347111225128174 }, { "id": "identity_333987", "image": "IconQA/images/10968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.71875, "loss": 4.021986484527588 }, { "id": "identity_262545", "image": "A-OKVQA/images/11737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is this player positioned? \nChoices:\n(A) stands\n(B) dugout\n(C) first base\n(D) pitcher's mound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.078125, "loss": 4.3346357345581055 }, { "id": "identity_23404", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are the street signs for?\nChoices:\n(A) drivers\n(B) directions\n(C) downtown\n(D) pedestrians" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.828125, "loss": 4.219170570373535 }, { "id": "identity_15501", "image": "PMC-VQA/images/PMC7830087_diagnostics-11-00129-f001_351374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is shown in the maximum intensity projection image (B)?\nChoices:\n(A) Abdomen\n(B) Brain\n(C) Chest\n(D) Pelvis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 4.146589279174805 }, { "id": "identity_286532", "image": "DocVQA/images/nzxv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who expressed their desire to submit the story for publication by October?" }, { "from": "gpt", "value": "The answer is Wayne" } ], "clip_score": 21.5, "loss": 4.769906044006348 }, { "id": "identity_47156", "image": "DocVQA/images/krlf0023_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of outlet is selected for this agreement?" }, { "from": "gpt", "value": "The answer is CTN" } ], "clip_score": 20.390625, "loss": 4.263772487640381 }, { "id": "identity_69292", "image": "AI2D/images/1318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the light energy depicted in the diagram?" }, { "from": "gpt", "value": "The answer is As yellow arrows from the Sun" } ], "clip_score": 31.03125, "loss": 4.310839653015137 }, { "id": "identity_221203", "image": "TextVQA/images/34df2fed3d2c0aa3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine bottles are displayed on the shelf?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.5, "loss": 4.064602851867676 }, { "id": "identity_28065", "image": "A-OKVQA/images/137134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport requires this man to lay almost flat to his boards?\nChoices:\n(A) snowboarding\n(B) ski jumping\n(C) ski climbing\n(D) ski crossing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.104198455810547 }, { "id": "identity_12139", "image": "A-OKVQA/images/58225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who bit this hot dog?\nChoices:\n(A) rat\n(B) photographer\n(C) dog\n(D) child" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.34375, "loss": 4.350323677062988 }, { "id": "identity_219470", "image": "A-OKVQA/images/226424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person crouching near the water?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.5, "loss": 4.900648593902588 }, { "id": "identity_11889", "image": "A-OKVQA/images/108303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fork next to?\nChoices:\n(A) chili\n(B) cherry pie\n(C) cake\n(D) steak" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.6875, "loss": 4.320567607879639 }, { "id": "identity_223946", "image": "PlotQA/images/142032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of borrowers involved with commercial banks in Kuwait from 2009 to 2013, and how does this compare to the average number of borrowers involved with commercial banks in Lebanon over the same period?" }, { "from": "gpt", "value": "The answer is -80" } ], "clip_score": 29.40625, "loss": 4.381809234619141 }, { "id": "identity_220687", "image": "VQA2.0/images/361601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trains can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.546875, "loss": 4.43685245513916 }, { "id": "identity_204362", "image": "A-OKVQA/images/420236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the sky?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.28125, "loss": 4.163063049316406 }, { "id": "identity_263924", "image": "ScienceQA/images/20989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of West Virginia? \n(A) Louisville\n(B) Charleston\n(C) Baton Rouge\n(D) Huntington" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5, "loss": 4.129039764404297 }, { "id": "identity_289666", "image": "A-OKVQA/images/143569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the national museum located nearby?\nChoices:\n(A) bar\n(B) temple\n(C) print\n(D) grafton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.1640625, "loss": 4.134113311767578 }, { "id": "identity_333675", "image": "PMC-VQA/images/PMC7921722_F1_380739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is defined in the outlined areas?\nChoices:\n(A) DNA structure\n(B) Combinations of sub-divisions\n(C) Mathematical formulas\n(D) Genealogy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.796875, "loss": 4.440596580505371 }, { "id": "identity_312508", "image": "DocVQA/images/xfmd0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of this person?" }, { "from": "gpt", "value": "The answer is senior director" } ], "clip_score": 22.265625, "loss": 4.6995697021484375 }, { "id": "identity_190971", "image": "TextVQA/images/460b1dcaf38fdf9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.6875, "loss": 4.371096611022949 }, { "id": "identity_313310", "image": "A-OKVQA/images/386908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bike is shown here?\nChoices:\n(A) chopper\n(B) tandem\n(C) mountain\n(D) electric" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.320206165313721 }, { "id": "identity_128105", "image": "TextVQA/images/73f973cdc6b2c43f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is Van" } ], "clip_score": 25.421875, "loss": 4.260411262512207 }, { "id": "identity_22545", "image": "A-OKVQA/images/530823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man on the right doing with the object in his hands?\nChoices:\n(A) sweeping\n(B) putting\n(C) steering\n(D) passing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.28125, "loss": 4.646303176879883 }, { "id": "identity_334169", "image": "FigureQA/images/60376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.015625, "loss": 4.048155307769775 }, { "id": "identity_15468", "image": "FigureQA/images/55986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.515625, "loss": 4.592759609222412 }, { "id": "identity_295453", "image": "ScienceQA/images/12568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\nChoices:\n(A) Des Moines\n(B) Davenport\n(C) Cedar Rapids\n(D) Springfield" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.421875, "loss": 4.184761047363281 }, { "id": "identity_15491", "image": "AI2D/images/3917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is the convolution of parietal lobe that is bounded in front by the central sulcus?\nChoices:\n(A) precentral gyrus\n(B) postcentral gyrus\n(C) central sulcus\n(D) lateral sulcus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.046875, "loss": 4.319302082061768 }, { "id": "identity_224882", "image": "PMC-VQA/images/PMC4236490_Fig2_337300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, describe the characteristics and location of the tumor, and explain how these features indicate the growth of the tumor" }, { "from": "gpt", "value": "The answer is Irregular shape, increased size, central brain region" } ], "clip_score": 31.453125, "loss": 4.582729339599609 }, { "id": "identity_329994", "image": "PMC-VQA/images/PMC7919781_materials-14-00928-f002_379848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the images?\nChoices:\n(A) Unmelted powder\n(B) Melted powder\n(C) Cracks\n(D) Smooth texture" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.8125, "loss": 4.184101104736328 }, { "id": "identity_26019", "image": "A-OKVQA/images/268523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: These boats are most likely in what kind of place?\nChoices:\n(A) marina\n(B) ocean\n(C) lake\n(D) river" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.78125, "loss": 4.034409046173096 }, { "id": "identity_312865", "image": "PMC-VQA/images/PMC8137781_Fig9_446471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality specifically seen in this area on the lateral radiograph?\nChoices:\n(A) Joint Dislocation\n(B) Osseous Sclerosis\n(C) Bone Fragmentation\n(D) Tendon Tear" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.53125, "loss": 4.9349260330200195 }, { "id": "identity_268761", "image": "TabMWP/images/9725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.578125, "loss": 4.198158264160156 }, { "id": "identity_190266", "image": "VQA2.0/images/445761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visibly holding a camera?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.859375, "loss": 4.675020694732666 }, { "id": "identity_7483", "image": "DVQA/images/bar_train_00191607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does the lightskyblue color represent?" }, { "from": "gpt", "value": "The answer is waste" } ], "clip_score": 22.171875, "loss": 4.082423210144043 }, { "id": "identity_299968", "image": "A-OKVQA/images/360673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being referred to in the following options: India, United States, China, or Italy?\nChoices:\n(A) india\n(B) united states\n(C) china\n(D) italy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8984375, "loss": 4.30361270904541 }, { "id": "identity_29187", "image": "ScienceQA/images/14071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) rough\n(B) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.171875, "loss": 4.328280448913574 }, { "id": "identity_25278", "image": "DocVQA/images/nzxv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who says that they'd like to get the story to the publication by the October?" }, { "from": "gpt", "value": "The answer is Wayne" } ], "clip_score": 21.328125, "loss": 4.869081020355225 }, { "id": "identity_29577", "image": "ScienceQA/images/3214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Connecticut?\nChoices:\n(A) New Haven\n(B) Hartford\n(C) Philadelphia\n(D) Montpelier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.266254901885986 }, { "id": "identity_203460", "image": "VQA2.0/images/243558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of citrus fruits can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.03125, "loss": 4.318695545196533 }, { "id": "identity_23920", "image": "PMC-VQA/images/PMC5438315_Fig9_166518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most common condition associated with the described CT finding? \nChoices:\n(A) Hemangioma \n(B) Varices \n(C) Hematoma \n(D) Phlebolith " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.485922336578369 }, { "id": "identity_268508", "image": "AI2D/images/333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter represents redwall limestone among the following choices: S, W, Y, or V?\nChoices:\n(A) S\n(B) W\n(C) Y\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.359375, "loss": 4.755093097686768 }, { "id": "identity_131415", "image": "VQA2.0/images/536781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes are visible in the picture?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.0625, "loss": 4.14537239074707 }, { "id": "identity_223650", "image": "A-OKVQA/images/136539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of milk can be seen in the refrigerator?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.3125, "loss": 4.303975582122803 }, { "id": "identity_5715", "image": "TextVQA/images/7b6df76509a1983b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the sign with the shoe prints say?" }, { "from": "gpt", "value": "The answer is zaptro" } ], "clip_score": 15.0546875, "loss": 4.087790012359619 }, { "id": "identity_312543", "image": "DVQA/images/bar_train_00005434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars?" }, { "from": "gpt", "value": "The answer is fun" } ], "clip_score": 16.4375, "loss": 4.281770706176758 }, { "id": "identity_210369", "image": "VQA2.0/images/330907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanterns are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.84375, "loss": 4.891289710998535 }, { "id": "identity_270553", "image": "AI2D/images/2650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What concept is represented by the letter K?\nChoices:\n(A) Tropic of Cancer\n(B) Tropic of Capricorn\n(C) Celestial Equator\n(D) Right Angle to Orbit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.59375, "loss": 4.795290470123291 }, { "id": "identity_297837", "image": "A-OKVQA/images/158861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is associated with the receptacle used for disposing of garbage? \n(A) teal\n(B) pink\n(C) yellow\n(D) green" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.1328125, "loss": 4.10239315032959 }, { "id": "identity_323496", "image": "PMC-VQA/images/PMC6441667_Fig7_459427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph was taken after the right mandibular resection?\nChoices:\n(A) Periapical radiograph\n(B) Panoramic radiograph\n(C) Bite-wing radiograph\n(D) Occlusal radiograph" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.546875, "loss": 4.086679935455322 }, { "id": "identity_191930", "image": "A-OKVQA/images/529545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the establishment shown in the top image?" }, { "from": "gpt", "value": "The answer is Wilhelm & Grahn" } ], "clip_score": 21.65625, "loss": 5.811772346496582 }, { "id": "identity_22599", "image": "PMC-VQA/images/PMC6789772_pathogens-08-00116-f005_50241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the image? \nChoices:\n(A) the outer host cell surface membrane \n(B) the location of developing meronts \n(C) the size of the PVs \n(D) the type of host cell " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.1875, "loss": 4.683055400848389 }, { "id": "identity_296351", "image": "IconQA/images/20353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by filling in the blank: It is (_) past seven. Answer using a time word, not a number." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 30.46875, "loss": 4.599467754364014 }, { "id": "identity_7272", "image": "FigureQA/images/79964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Coral have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.890625, "loss": 4.036825656890869 }, { "id": "identity_303701", "image": "IconQA/images/49545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) impossible\n(B) certain\n(C) probable\n(D) unlikely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.09375, "loss": 4.365946292877197 }, { "id": "identity_187882", "image": "VizWiz/images/VizWiz_train_00009791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the onesie?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 27.984375, "loss": 4.807353496551514 }, { "id": "identity_143345", "image": "DocVQA/images/fkkx0021_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contingency location for the event in case of bad weather?" }, { "from": "gpt", "value": "The answer is Inside" } ], "clip_score": 19.59375, "loss": 4.278382301330566 }, { "id": "identity_311989", "image": "Super-CLEVR/images/superCLEVR_new_001777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large blue things that are to the right of a red object than large metallic aeroplanes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.921875, "loss": 5.1992106437683105 }, { "id": "identity_151059", "image": "AI2D/images/1766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 33.375, "loss": 4.553513526916504 }, { "id": "identity_314999", "image": "A-OKVQA/images/136770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of footwear is the individual in the white shirt wearing?\nChoices:\n(A) adidas\n(B) nike\n(C) skechers\n(D) new balance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.921875, "loss": 4.282560348510742 }, { "id": "identity_193783", "image": "Super-CLEVR/images/superCLEVR_new_000078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the motorcycle likely made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 16.21875, "loss": 4.00191593170166 }, { "id": "identity_272224", "image": "PMC-VQA/images/PMC3584718_F1_188918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images shown?\nChoices:\n(A) Neurons\n(B) Glia cells\n(C) Blood vessels\n(D) Muscle cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.930319786071777 }, { "id": "identity_68472", "image": "ScienceQA/images/1799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many gill slits are visible on the fish in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 21.84375, "loss": 4.053021430969238 }, { "id": "identity_267209", "image": "AI2D/images/3534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the cell is the nucleolus located?\nChoices:\n(A) Nucleolus\n(B) Cytoplasm\n(C) Chloroplast\n(D) Golgi apparatus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.484375, "loss": 4.220389366149902 }, { "id": "identity_336166", "image": "FigureQA/images/34903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6015625, "loss": 4.038233757019043 }, { "id": "identity_837", "image": "FigureQA/images/83308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.15625, "loss": 4.303629398345947 }, { "id": "identity_302051", "image": "PMC-VQA/images/PMC7670585_fig4_299612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the feature in the left upper lobe of the image?\nChoices:\n(A) typical halo sign\n(B) nontypical halo sign\n(C) ground-glass opacities\n(D) consolidation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.03125, "loss": 4.318638801574707 }, { "id": "identity_332215", "image": "A-OKVQA/images/242592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these colorful machines called?\nChoices:\n(A) garbage bins\n(B) mailboxes\n(C) vending machines\n(D) storage lockers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.828125, "loss": 4.333654403686523 }, { "id": "identity_312781", "image": "AI2D/images/556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in this?\nChoices:\n(A) guard cells\n(B) nucleus\n(C) cell wall\n(D) membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.953125, "loss": 4.301218509674072 }, { "id": "identity_121771", "image": "A-OKVQA/images/84341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What trick is the skateboarder performing?" }, { "from": "gpt", "value": "The answer is Nose grab" } ], "clip_score": 27.453125, "loss": 4.0206732749938965 }, { "id": "identity_222", "image": "PMC-VQA/images/PMC8537352_insects-12-00850-f002_92118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible inside the gall? \nChoices:\n(A) Unknown spore and veins \n(B) Radial files of epidermal cells and silk thread \n(C) Stomata-like opening and unknown spore \n(D) Unknown spore and caterpillar " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.15625, "loss": 4.155924320220947 }, { "id": "identity_313303", "image": "DocVQA/images/fkkd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the representative for the project client?" }, { "from": "gpt", "value": "The answer is Doug Dillman" } ], "clip_score": 20.296875, "loss": 4.033334732055664 }, { "id": "identity_4385", "image": "ScienceQA/images/7175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which i in row A?\nChoices:\n(A) the fast-food restaurant\n(B) the theater\n(C) the pond\n(D) the fire department" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.8125, "loss": 4.270816326141357 }, { "id": "identity_311936", "image": "PMC-VQA/images/PMC4661983_Fig6_448443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was done after 12 months?\nChoices:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 4.281308650970459 }, { "id": "identity_199578", "image": "PMC-VQA/images/PMC6842549_Fig3_65317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text present on the image, and if so, what does it say?" }, { "from": "gpt", "value": "The answer is Yes, \"A lung pathology only 3mm becomes volume" } ], "clip_score": 24.484375, "loss": 5.195667266845703 }, { "id": "identity_221165", "image": "VQA2.0/images/44332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is the small figurine in front of the vase?" }, { "from": "gpt", "value": "The answer is A ram" } ], "clip_score": 27.078125, "loss": 5.079408168792725 }, { "id": "identity_293034", "image": "PMC-VQA/images/PMC7215337_ijms-21-02817-f005_166971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen more clearly in the magnified inset?\nChoices:\n(A) Rudimentary alveolar wall \n(B) Chlamydial inclusions \n(C) Interlobular septum \n(D) Plasma cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.546875, "loss": 4.817038536071777 }, { "id": "identity_56900", "image": "DocVQA/images/gmvj0223_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total aggregate number of shares proposed to be issued?" }, { "from": "gpt", "value": "The answer is shares] 0" } ], "clip_score": 24.15625, "loss": 4.852929592132568 }, { "id": "identity_282886", "image": "A-OKVQA/images/574946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mythical creature is favored by the owner of the pink boat?\nChoices:\n(A) peter pan\n(B) mermaids\n(C) tinkerbell\n(D) pinnocchio" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.1416754722595215 }, { "id": "identity_311785", "image": "TQA/images/question_images/earth_eclipses_1668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens during a solar eclipse?\nChoices:\n(A) The sun is between the moon and the Earth\n(B) The moon is next to the sun next to the Earth\n(C) The moon stays in the middle between the sun and the Earth\n(D) The Earth is between the moon and the sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.265625, "loss": 4.573644638061523 }, { "id": "identity_304496", "image": "ScienceQA/images/18967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Observe the image. Which word best describes the texture of these candies?\nChoices:\n(A) hard\n(B) fluffy\n(C) greasy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.671875, "loss": 4.262320041656494 }, { "id": "identity_19547", "image": "FigureQA/images/84462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Red have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 4.508627414703369 }, { "id": "identity_298448", "image": "IconQA/images/102729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth one in the sequence, given that the first picture is a house?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.75, "loss": 4.419157981872559 }, { "id": "identity_312729", "image": "A-OKVQA/images/44504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this?\n\nChoices:\n(A) production wheel\n(B) artwork\n(C) retail display\n(D) dentist wheel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.25, "loss": 4.62087345123291 }, { "id": "identity_39601", "image": "DVQA/images/bar_train_00198065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the sweat dataset in the legend?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 20.453125, "loss": 4.097007751464844 }, { "id": "identity_291984", "image": "PMC-VQA/images/PMC3098834_pone-0019886-g001_96439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding of the brain CT scan?\nChoices:\n(A) Normal brain \n(B) Left hemisphere stroke \n(C) Massive hematoma of the right cerebral cortex \n(D) Herniation of the cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.625, "loss": 4.821924686431885 }, { "id": "identity_328080", "image": "VQA2.0/images/56800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 11.515625, "loss": 4.2292799949646 }, { "id": "identity_16925", "image": "Super-CLEVR/images/superCLEVR_new_001598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of rubber things right of the blue metallic bicycle less than the number of large metallic bicycles?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.15625, "loss": 4.9428181648254395 }, { "id": "identity_10852", "image": "DVQA/images/bar_train_00038081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have accuracies lower than 6?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 31.734375, "loss": 4.069899559020996 }, { "id": "identity_309705", "image": "FigureQA/images/80452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Navy Blue the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.609375, "loss": 4.300229549407959 }, { "id": "identity_292621", "image": "FigureQA/images/97235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Light Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.734375, "loss": 4.535236358642578 }, { "id": "identity_34780", "image": "AI2D/images/303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what does the killer whale represent?\nChoices:\n(A) Prey\n(B) Producer\n(C) Predator\n(D) Parasite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.25, "loss": 4.145872116088867 }, { "id": "identity_280546", "image": "PMC-VQA/images/PMC7285026_microorganisms-08-00634-f001_188734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image was captured of the cells?\nOptions:\n(A) SEM image\n(B) TEM image\n(C) Confocal image\n(D) Bright-field image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 4.688349723815918 }, { "id": "identity_44973", "image": "DocVQA/images/jygp0000_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 21.515625, "loss": 4.273797988891602 }, { "id": "identity_330193", "image": "DocVQA/images/mzfm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code for Otology?" }, { "from": "gpt", "value": "The answer is ot" } ], "clip_score": 18.421875, "loss": 4.451993942260742 }, { "id": "identity_309728", "image": "A-OKVQA/images/44471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What should people do in this situation?\nChoices:\n(A) wait\n(B) go\n(C) say hi\n(D) slow down" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.9921875, "loss": 4.124194145202637 }, { "id": "identity_325664", "image": "DVQA/images/bar_train_00088904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is auto" } ], "clip_score": 18.71875, "loss": 4.494360446929932 }, { "id": "identity_38061", "image": "TabMWP/images/31104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jake tracked the attendance at the school play. On which day did the fewest people attend the play?\nChoices:\n(A) Wednesday\n(B) Thursday\n(C) Friday\n(D) Saturday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0, "loss": 4.0516862869262695 }, { "id": "identity_21417", "image": "A-OKVQA/images/261426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the sandwich that is highest in the air?\nChoices:\n(A) black olives\n(B) toothpick\n(C) giant eggs\n(D) carrots" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.2050065994262695 }, { "id": "identity_283175", "image": "A-OKVQA/images/75581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is he currently engaged in?\nChoices:\n(A) showing off\n(B) taking shortcut\n(C) tricks\n(D) falling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4296875, "loss": 4.472317695617676 }, { "id": "identity_274733", "image": "PMC-VQA/images/PMC6639972_Fig3_11423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the left side of the image, which hemisphere of the brain is depicted?\nChoices:\n(A) Left hemisphere\n(B) Right hemisphere\n(C) Both hemispheres\n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.0, "loss": 4.025128364562988 }, { "id": "identity_315309", "image": "ScienceQA/images/15607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is in row A?\nChoices:\n(A) the fast-food restaurant\n(B) the fire department\n(C) the grocery store\n(D) the theater" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.984375, "loss": 4.410351753234863 }, { "id": "identity_310148", "image": "ScienceQA/images/2701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the fish?\nChoices:\n(A) great white shark\n(B) giraffe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.8125, "loss": 4.948512077331543 }, { "id": "identity_126920", "image": "PMC-VQA/images/PMC4349652_pone.0118849.g003_364003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely function of the tissue section based on its histological features?" }, { "from": "gpt", "value": "The answer is Protection and secretion" } ], "clip_score": 27.03125, "loss": 4.2739644050598145 }, { "id": "identity_208639", "image": "A-OKVQA/images/151371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the parking meter next to the truck?" }, { "from": "gpt", "value": "The answer is HILO WILD F20" } ], "clip_score": 24.703125, "loss": 4.130053997039795 }, { "id": "identity_242038", "image": "FigureQA/images/62707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest value at x = 50, and what is the difference between this value and the value of Seafoam at x = 50?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 23.375, "loss": 4.124916076660156 }, { "id": "identity_123776", "image": "TextVQA/images/a83b37081e0d79cf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many handles does the container have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.3125, "loss": 4.741059303283691 }, { "id": "identity_313848", "image": "AI2D/images/1042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, which label indicates summer?\nChoices:\n(A) C\n(B) B\n(C) E\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.359375, "loss": 4.208103656768799 }, { "id": "identity_60921", "image": "DocVQA/images/qzlf0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this facsimile?" }, { "from": "gpt", "value": "The answer is John Martin" } ], "clip_score": 24.609375, "loss": 4.785996437072754 }, { "id": "identity_327485", "image": "PMC-VQA/images/PMC5851596_pone.0193386.g004_287805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red arrows indicate in this image?\nChoices:\n(A) presence of apoptotic cell death\n(B) presence of mitotic cells\n(C) presence of necrotic cells\n(D) presence of dividing cells" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.84375, "loss": 4.241644859313965 }, { "id": "identity_187967", "image": "A-OKVQA/images/304150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many laptops are visible on the table in the first row?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.96875, "loss": 4.41216516494751 }, { "id": "identity_325418", "image": "PMC-VQA/images/PMC4171482_pone-0107681-g005_321796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structures exhibit reduced density?\nChoices:\n(A) Otic capsule\n(B) Vertebral column\n(C) Pelvic girdle\n(D) Skull base" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.125, "loss": 4.032099723815918 }, { "id": "identity_216557", "image": "VizWiz/images/VizWiz_train_00001886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wall behind the table?" }, { "from": "gpt", "value": "The answer is Light green or gray" } ], "clip_score": 26.84375, "loss": 4.894831657409668 }, { "id": "identity_299763", "image": "IconQA/images/68509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape from the following options: cone, cube, sphere, cylinder.\nChoices:\n(A) cone\n(B) cube\n(C) sphere\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.734375, "loss": 4.102307319641113 }, { "id": "identity_25151", "image": "Super-CLEVR/images/superCLEVR_new_001698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the small object that is in front of the big green metallic bus have the same shape as the large metallic thing that is in front of the tiny purple biplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 34.25, "loss": 5.136713981628418 }, { "id": "identity_215772", "image": "Super-CLEVR/images/superCLEVR_new_000032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object that is in front of the small purple metallic thing and behind the big matte airplane?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 34.125, "loss": 4.348853588104248 }, { "id": "identity_15020", "image": "PMC-VQA/images/PMC7797455_FIG3_339777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are more extensively affected on the CT images? \nChoices:\n(A) Upper lobes \n(B) Middle lobes \n(C) Lower lobes \n(D) All lobes are equally affected" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.579625129699707 }, { "id": "identity_334714", "image": "PMC-VQA/images/PMC6894893_FIG1_79450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of the gestational sac in the ultrasound image?\nChoices:\n(A) Round\n(B) Oval\n(C) Elongated\n(D) Irregular" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.439953327178955 }, { "id": "identity_121882", "image": "PMC-VQA/images/PMC6598368_Fig1_1001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general condition of the ventricles in this image?" }, { "from": "gpt", "value": "The answer is Normal" } ], "clip_score": 31.265625, "loss": 4.941000938415527 }, { "id": "identity_329985", "image": "A-OKVQA/images/383212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of number is displayed on the bike?\nChoices:\n(A) composite\n(B) odd\n(C) even\n(D) negative" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.6875, "loss": 4.3711652755737305 }, { "id": "identity_288769", "image": "ScienceQA/images/1125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Colorado?\nChoices:\n(A) Denver\n(B) Salem\n(C) Olympia\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.640625, "loss": 4.137630462646484 }, { "id": "identity_5761", "image": "DVQA/images/bar_train_00156206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dataset does the palegoldenrod color represent?" }, { "from": "gpt", "value": "The answer is clock" } ], "clip_score": 18.515625, "loss": 4.268755912780762 }, { "id": "identity_280906", "image": "IconQA/images/97910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of diamonds is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.15625, "loss": 4.057857513427734 }, { "id": "identity_306723", "image": "ScienceQA/images/9684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these four items?\nChoices:\n(A) stretchy\n(B) opaque\n(C) sour" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.578125, "loss": 4.387997150421143 }, { "id": "identity_268193", "image": "DVQA/images/bar_train_00071820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the entire chart, which group of bars has the highest value for an individual bar?" }, { "from": "gpt", "value": "The answer is well" } ], "clip_score": 27.828125, "loss": 4.263354301452637 }, { "id": "identity_194934", "image": "PMC-VQA/images/PMC6339402_Fig2_425130.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label at the bottom of the image indicating?" }, { "from": "gpt", "value": "The answer is Imaging details and contrast agent used" } ], "clip_score": 24.90625, "loss": 4.733150482177734 }, { "id": "identity_2246", "image": "VQA2.0/images/265279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which person is about to hit the ball?" }, { "from": "gpt", "value": "The answer is serving man" } ], "clip_score": 21.015625, "loss": 4.578688621520996 }, { "id": "identity_292578", "image": "PMC-VQA/images/PMC5613789_F7_219958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is the position of the sagittal slice displayed?\nChoices:\n(A) 30 mm to the left of the AC\n(B) 56.5 mm to the right of the AC\n(C) 30 mm to the right of the AC\n(D) 56.5 mm to the left of the AC" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.609375, "loss": 4.1226091384887695 }, { "id": "identity_280388", "image": "AI2D/images/194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which land animal considers a mouse as its prey?\nChoices:\n(A) rabbit\n(B) hawk\n(C) snake\n(D) NA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.203125, "loss": 4.406702041625977 }, { "id": "identity_33754", "image": "IconQA/images/94951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.546875, "loss": 4.261051177978516 }, { "id": "identity_246045", "image": "TabMWP/images/18271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of votes for each popsicle flavor, and which flavor received the highest total number of votes?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 38.25, "loss": 4.948732376098633 }, { "id": "identity_204240", "image": "Super-CLEVR/images/superCLEVR_new_001706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is to the left of the tiny rubber bus and in front of the shiny motorcycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.875, "loss": 4.74228048324585 }, { "id": "identity_337294", "image": "PMC-VQA/images/PMC7934521_Fig2_384858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the pulmonary system was injected with something?\nChoices:\n(A) Bronchus\n(B) Alveoli\n(C) Pulmonary vein\n(D) Feeding artery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.375, "loss": 4.058732509613037 }, { "id": "identity_303374", "image": "Super-CLEVR/images/superCLEVR_new_001929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the rubber object on the right side of the red shiny thing the same as the small metal scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.578125, "loss": 5.2259440422058105 }, { "id": "identity_312212", "image": "A-OKVQA/images/114540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure where these people are riding?\nChoices:\n(A) highway\n(B) gangplank\n(C) quay\n(D) overpass" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.0625, "loss": 4.212599277496338 }, { "id": "identity_294985", "image": "PMC-VQA/images/PMC6108968_Fig4_358750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical procedure was conducted on the patient?\nOptions:\n(A) MRI scan\n(B) CT scan\n(C) PTA and stenting\n(D) Angiogram scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.40625, "loss": 4.795413970947266 }, { "id": "identity_312520", "image": "Super-CLEVR/images/superCLEVR_new_001248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large purple rubber thing behind the rubber thing to the left of the sedan?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.703125, "loss": 5.245896339416504 }, { "id": "identity_271122", "image": "TQA/images/abc_question_images/optics_lense_types_19161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the label Y in the given choices? Is it:\n(A) BLIND SPOT\n(B) VANISHING POINT\n(C) DISTANCE\n(D) FOCUS" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.515625, "loss": 4.620205879211426 }, { "id": "identity_27147", "image": "PMC-VQA/images/PMC4638099_Fig1_442502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the white matter on T2-weighted axial imaging? \nChoices:\n(A) Homogeneous \n(B) Hypointense \n(C) Heterogeneous \n(D) Hyperintense " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.953125, "loss": 4.377298831939697 }, { "id": "identity_38020", "image": "IconQA/images/99096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.046875, "loss": 4.036818504333496 }, { "id": "identity_102475", "image": "IconQA/images/7052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the flower's center?" }, { "from": "gpt", "value": "The answer is Circle" } ], "clip_score": 28.546875, "loss": 4.224946022033691 }, { "id": "identity_274618", "image": "FigureQA/images/38466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle not the minimum?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2421875, "loss": 4.220484256744385 }, { "id": "identity_67735", "image": "TQA/images/question_images/earth_poles_117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.09375, "loss": 5.1485276222229 }, { "id": "identity_279463", "image": "PMC-VQA/images/PMC7299599_f4_194033.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red circle represent in the image provided?\nChoices:\n(A) True lumen of left CCA \n(B) A false lumen of left CCA \n(C) A thrombus in the true lumen of left CCA \n(D) An atherosclerotic plaque in true lumen of left CCA" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.3125, "loss": 4.660052299499512 }, { "id": "identity_222994", "image": "TextVQA/images/951d338fd366d398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many basketballs can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.9296875, "loss": 4.496579170227051 }, { "id": "identity_243744", "image": "TabMWP/images/7843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of livestock on all farms combined, and which farm has the highest percentage of the total livestock?" }, { "from": "gpt", "value": "The answer is Middle Farm" } ], "clip_score": 37.65625, "loss": 4.484231472015381 }, { "id": "identity_106188", "image": "VQA-AS/images/16713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is placed next to the door?" }, { "from": "gpt", "value": "The answer is A coat stand" } ], "clip_score": 21.8125, "loss": 4.350275039672852 }, { "id": "identity_275836", "image": "A-OKVQA/images/484619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What food is being prepared in this scenario?\nChoices:\n(A) fish\n(B) waffles\n(C) chicken\n(D) cookies" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.077536106109619 }, { "id": "identity_114399", "image": "TextVQA/images/005b92c904f041ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the table in the center of the image?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 13.4765625, "loss": 4.172491073608398 }, { "id": "identity_219923", "image": "A-OKVQA/images/148395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle directly behind the green car?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.1875, "loss": 4.003987789154053 }, { "id": "identity_211574", "image": "PMC-VQA/images/PMC6000706_Figure5_328815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in this image?" }, { "from": "gpt", "value": "The answer is Yes, areas of increased density" } ], "clip_score": 24.40625, "loss": 4.475471496582031 }, { "id": "identity_267576", "image": "VizWiz/images/VizWiz_train_00009158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this object is?" }, { "from": "gpt", "value": "The answer is this cup" } ], "clip_score": 31.4375, "loss": 4.81709623336792 }, { "id": "identity_34126", "image": "ScienceQA/images/13405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Florida\n(B) Maine\n(C) New Hampshire\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.890625, "loss": 4.413126468658447 }, { "id": "identity_264115", "image": "A-OKVQA/images/441168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of players participating in the multi-screen game based on the player count indicated? \n(A) two\n(B) three\n(C) one\n(D) four" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 4.006654262542725 }, { "id": "identity_183447", "image": "A-OKVQA/images/334775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are visible on the beds?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.25, "loss": 4.4065985679626465 }, { "id": "identity_75917", "image": "TQA/images/question_images/dna_6769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the section that contains a cytosine (C) base not paired with a guanine (G) base" }, { "from": "gpt", "value": "The answer is None" } ], "clip_score": 26.875, "loss": 4.1720476150512695 }, { "id": "identity_194561", "image": "A-OKVQA/images/155997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle closest to the dog?" }, { "from": "gpt", "value": "The answer is Silver or grey" } ], "clip_score": 26.65625, "loss": 4.0968451499938965 }, { "id": "identity_6994", "image": "PMC-VQA/images/PMC5341329_F4_135511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is displayed in the upper right panels of the images? \nChoices:\n(A) Calreticulin \n(B) Nuclei \n(C) Radiated cells \n(D) Isotype control. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.671875, "loss": 4.660785675048828 }, { "id": "identity_289048", "image": "Super-CLEVR/images/superCLEVR_new_001753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does a cyan rubber object exist that is the same size as the green sedan?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.109375, "loss": 5.076784133911133 }, { "id": "identity_271857", "image": "PMC-VQA/images/PMC3934392_fig2_269369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what do the letters P, R, and * represent?\nChoices:\n(A) Different pathologies \n(B) Different imaging techniques \n(C) Different body organs \n(D) Different spaces in the body." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.1875, "loss": 4.317069053649902 }, { "id": "identity_306758", "image": "PMC-VQA/images/PMC3919884_f1-ol-07-03-0894_265919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition of the left medial ilium in this image?\nChoices:\n(A) Destroyed \n(B) Enlarged \n(C) Normal \n(D) Inflamed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.265625, "loss": 4.534532070159912 }, { "id": "identity_297697", "image": "ScienceQA/images/5334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this flower pot?\nChoices:\n(A) cotton\n(B) ceramic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.6875, "loss": 4.210815906524658 }, { "id": "identity_267001", "image": "AI2D/images/4844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of eruption does not result in the formation of a cloud?\nChoices:\n(A) Strombolian\n(B) Pelean\n(C) Hawaiian\n(D) Plinian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.1875, "loss": 4.144120693206787 }, { "id": "identity_320350", "image": "PMC-VQA/images/PMC7878549_F2_366617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition indicated by the white arrows in this image?\nChoices:\n(A) Perilesional edema \n(B) Infarcted tissue \n(C) Brain metastasis \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.921875, "loss": 4.8616437911987305 }, { "id": "identity_191980", "image": "VQA2.0/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is mentioned on the box attached to the bike?" }, { "from": "gpt", "value": "The answer is Resort" } ], "clip_score": 26.5, "loss": 4.4415602684021 }, { "id": "identity_305471", "image": "PMC-VQA/images/PMC4936326_Fig4_48807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the involvement of the tibialis begin in this scenario?\nChoices:\n(A) In the middle of the muscle.\n(B) From the periphery of the muscle.\n(C) In the proximal part of the muscle.\n(D) In a different muscle." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 4.164647102355957 }, { "id": "identity_26908", "image": "AI2D/images/3589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is G in the diagram?\nChoices:\n(A) prostate gland\n(B) pubic bone\n(C) prepuce\n(D) urinary bladder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.921875, "loss": 4.142199516296387 }, { "id": "identity_119399", "image": "VQA2.0/images/517276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are placed on the white surface?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.140625, "loss": 4.184454441070557 }, { "id": "identity_8065", "image": "TQA/images/abc_question_images/human_system_circulatory_13625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the kidneys?\nChoices:\n(A) C\n(B) N\n(C) S\n(D) U" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.745777130126953 }, { "id": "identity_291761", "image": "A-OKVQA/images/72011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which season will follow the current season? \nChoices:\n(A) summer\n(B) winter\n(C) spring\n(D) autumn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.796875, "loss": 4.238492012023926 }, { "id": "identity_217077", "image": "TextVQA/images/629a91565455ec26.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many smaller hanging CH advertisements are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.09375, "loss": 4.672184467315674 }, { "id": "identity_13436", "image": "ScienceQA/images/12144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this passage describe the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.265625, "loss": 4.0829620361328125 }, { "id": "identity_7379", "image": "DocVQA/images/jgph0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the factory name ?" }, { "from": "gpt", "value": "The answer is Longmont" } ], "clip_score": 19.921875, "loss": 5.119377613067627 }, { "id": "identity_216331", "image": "PMC-VQA/images/PMC2519030_f5_26894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cells exhibit green fluorescence?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 25.0, "loss": 4.7164788246154785 }, { "id": "identity_29692", "image": "DVQA/images/bar_train_00033937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars are there?" }, { "from": "gpt", "value": "The answer is eight" } ], "clip_score": 14.40625, "loss": 4.261547565460205 }, { "id": "identity_138568", "image": "FigureQA/images/12596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a larger slice than Medium Blue?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.59375, "loss": 4.776007175445557 }, { "id": "identity_315689", "image": "PMC-VQA/images/PMC7519446_fig3_261632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the study?\nChoices:\n(A) Rats \n(B) Mice \n(C) Rabbits \n(D) Pigs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0703125, "loss": 5.158713340759277 }, { "id": "identity_14233", "image": "IconQA/images/13247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 4.128465175628662 }, { "id": "identity_307730", "image": "DVQA/images/bar_train_00067929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across these datasets?" }, { "from": "gpt", "value": "The answer is pair" } ], "clip_score": 33.84375, "loss": 4.326054573059082 }, { "id": "identity_327323", "image": "A-OKVQA/images/490629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person wearing a triangular hat?\nChoices:\n(A) dress code\n(B) visibility\n(C) protection\n(D) camouflage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.8984375, "loss": 4.304068565368652 }, { "id": "identity_334617", "image": "PMC-VQA/images/PMC8638205_Fig3_129271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the color bar represent in this image?\nChoices:\n(A) P-values\n(B) T-values\n(C) F-values\n(D) Z-values" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4375, "loss": 4.971274375915527 }, { "id": "identity_274164", "image": "A-OKVQA/images/423025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of enthusiasts are coming together at this gathering? Are they:\n(A) gamers\n(B) democrats\n(C) bikers\n(D) liberals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0, "loss": 4.449420928955078 }, { "id": "identity_302215", "image": "PMC-VQA/images/PMC1803030_pone-0000276-g002_9682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the values to the left of these plots represent?\nChoices:\n(A) Age range\n(B) Gender distribution\n(C) Power distribution\n(D) Electrode position" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.40625, "loss": 4.857274055480957 }, { "id": "identity_308618", "image": "TQA/images/question_images/parts_microscope_7197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the body tube and the mechanical stage in a microscope?\nChoices:\n(A) Mirror\n(B) Arm\n(C) Base\n(D) Rack and Pinion substage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.828125, "loss": 4.788231372833252 }, { "id": "identity_265938", "image": "IconQA/images/62205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.84375, "loss": 4.776045799255371 }, { "id": "identity_262590", "image": "AI2D/images/3876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the second order branch:\n(A) B\n(B) E\n(C) C\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.141758441925049 }, { "id": "identity_296249", "image": "PMC-VQA/images/PMC8607350_FIG3_116842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are visible within the fat in bilateral feet?\nChoices:\n(A) Blood vessels \n(B) Fibrous strands \n(C) Cartilage \n(D) Muscles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.84375, "loss": 4.808454990386963 }, { "id": "identity_33749", "image": "A-OKVQA/images/418346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the fence?\nChoices:\n(A) statues\n(B) hair clips\n(C) monkeys\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.890625, "loss": 4.168725490570068 }, { "id": "identity_268675", "image": "PMC-VQA/images/PMC5980103_Fig3_322321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead in the image represent?\nChoices:\n(A) dental pulp \n(B) amelogenin \n(C) enamel \n(D) epithelial pearls" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.359375, "loss": 4.40016508102417 }, { "id": "identity_124725", "image": "VQA-AS/images/19745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.421875, "loss": 4.37985897064209 }, { "id": "identity_221973", "image": "A-OKVQA/images/300578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of leafy greens are in the bin closest to the foreground on the right?" }, { "from": "gpt", "value": "The answer is Radishes with leaves" } ], "clip_score": 25.8125, "loss": 4.441774845123291 }, { "id": "identity_228770", "image": "VQA2.0/images/86210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual indicators in the image that suggest it is winter, and how do they collectively confirm the season?" }, { "from": "gpt", "value": "The answer is Snow, bare trees, heavy clothing" } ], "clip_score": 20.421875, "loss": 4.493107795715332 }, { "id": "identity_8227", "image": "FigureQA/images/19377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 4.184943199157715 }, { "id": "identity_330540", "image": "A-OKVQA/images/136539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these items in this enclosed case?\nChoices:\n(A) anti-theft\n(B) disinfect\n(C) defrost\n(D) keep cold" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.09375, "loss": 4.022552490234375 }, { "id": "identity_287089", "image": "PMC-VQA/images/PMC4240575_pone-0110727-g002_338395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the strength of the MRI used in the image?\nOptions:\n(A) 1 Tesla\n(B) 2 Tesla\n(C) 3 Tesla\n(D) 4 Tesla" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.34375, "loss": 4.014449119567871 }, { "id": "identity_280996", "image": "VQA2.0/images/448607.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you list the colors of the umbrellas?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 21.359375, "loss": 4.01292085647583 }, { "id": "identity_311708", "image": "FigureQA/images/6635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.71875, "loss": 4.586700439453125 }, { "id": "identity_207974", "image": "A-OKVQA/images/458123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many jet bridges are connected to airplanes?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.671875, "loss": 4.652527332305908 }, { "id": "identity_186347", "image": "TextVQA/images/594b73dfed3eb809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the track field?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.546875, "loss": 4.326140880584717 }, { "id": "identity_281872", "image": "PMC-VQA/images/PMC7089712_Fig2_140034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the white and black colors in the spheres within the model?\nChoices:\n(A) Atomic structure \n(B) Vibrating displacements \n(C) Size of the microparticles \n(D) Chemical composition" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.890625, "loss": 4.13272762298584 }, { "id": "identity_329989", "image": "PMC-VQA/images/PMC5999462_F1_328205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bone is impacted by the inflammation?\nChoices:\n(A) Mandible\n(B) Maxillary\n(C) Temporomandibular\n(D) Palatine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.165219306945801 }, { "id": "identity_60226", "image": "PlotQA/images/93576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has shown the most significant decrease from 2005 to 2010?" }, { "from": "gpt", "value": "The answer is High income: nonOECD" } ], "clip_score": 24.546875, "loss": 4.548837184906006 }, { "id": "identity_290696", "image": "PMC-VQA/images/PMC7603210_jcm-09-03335-f001_288602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In relation to the tracheal carina, where is the scan located?\nChoices:\n(A) Just above it (white arrows)\n(B) Just below it (white arrows)\n(C) To the left of it (white arrows)\n(D) To the right of it (white arrows)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 4.383851051330566 }, { "id": "identity_299567", "image": "FigureQA/images/15299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.828125, "loss": 4.6373419761657715 }, { "id": "identity_284639", "image": "A-OKVQA/images/383905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what are the men depending to transport them? elephants, people, car, or motor?\nChoices:\n(A) elephants\n(B) people\n(C) car\n(D) motor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.1875, "loss": 4.196089267730713 }, { "id": "identity_7821", "image": "PMC-VQA/images/PMC2492868_F6_26128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the technique used to obtain the results presented in the figure? \nChoices:\n(A) Northern blot \n(B) Western blot \n(C) Southern blot \n(D) Eastern blot " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.28125, "loss": 4.494308948516846 }, { "id": "identity_290978", "image": "A-OKVQA/images/406139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the woman holding an umbrella?\nChoices:\n(A) block sun\n(B) staying dry\n(C) block wind\n(D) to dance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.875, "loss": 4.78329610824585 }, { "id": "identity_286673", "image": "A-OKVQA/images/257388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In her right hand, what is the object that this woman is holding?\nChoices:\n(A) gun\n(B) napkin\n(C) wine\n(D) umbrella" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.163933277130127 }, { "id": "identity_301212", "image": "A-OKVQA/images/121143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is close to the structure?\nChoices:\n(A) car\n(B) airplane\n(C) princess\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.9921875, "loss": 4.33303165435791 }, { "id": "identity_241458", "image": "FigureQA/images/78464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the overall trends of Peru and Olive, and determine which one has a more consistent rate of change" }, { "from": "gpt", "value": "The answer is Peru" } ], "clip_score": 29.484375, "loss": 4.285310745239258 }, { "id": "identity_107615", "image": "TextVQA/images/83ad308d8264bbbd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the book cover's background?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 19.140625, "loss": 4.502630233764648 }, { "id": "identity_310384", "image": "A-OKVQA/images/260627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What energy powers this vehicle?\nChoices:\n(A) electric\n(B) coal\n(C) gas\n(D) oil" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.34375, "loss": 4.2033491134643555 }, { "id": "identity_303984", "image": "IconQA/images/23183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of diamonds even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.65625, "loss": 4.011293888092041 }, { "id": "identity_137852", "image": "PlotQA/images/54728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did Denmark provide aid to Belarus?" }, { "from": "gpt", "value": "The answer is The number of Denmark's bars" } ], "clip_score": 31.234375, "loss": 4.153271198272705 }, { "id": "identity_106631", "image": "TextVQA/images/1b77a8c8a3a93788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the manufacturer of the clock?" }, { "from": "gpt", "value": "The answer is A. LIMBERT" } ], "clip_score": 30.78125, "loss": 4.149850845336914 }, { "id": "identity_259168", "image": "ScienceQA/images/20083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are two properties that match this object, and why?" }, { "from": "gpt", "value": "The answer is Soft and flexible" } ], "clip_score": 12.59375, "loss": 4.714831352233887 }, { "id": "identity_294205", "image": "ScienceQA/images/1885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\n(A) Saint Paul\n(B) Madison\n(C) Indianapolis\n(D) Minneapolis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.953125, "loss": 4.259068965911865 }, { "id": "identity_218997", "image": "PMC-VQA/images/PMC9224862_jcm-11-03399-f001_321101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different views of the MRI are presented in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 32.15625, "loss": 4.295228958129883 }, { "id": "identity_26624", "image": "TextVQA/images/4a5645fbdf764cfd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of parts are in the box?" }, { "from": "gpt", "value": "The answer is organ action parts" } ], "clip_score": 25.09375, "loss": 5.2848076820373535 }, { "id": "identity_75576", "image": "ScienceQA/images/5985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of settlement is depicted in the image?" }, { "from": "gpt", "value": "The answer is Village" } ], "clip_score": 19.78125, "loss": 4.250008583068848 }, { "id": "identity_216225", "image": "Super-CLEVR/images/superCLEVR_new_000966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tiny rubber object that is right of the brown matte bus?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 33.09375, "loss": 4.128238201141357 }, { "id": "identity_302556", "image": "FigureQA/images/14678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green larger than Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.28125, "loss": 4.2128472328186035 }, { "id": "identity_314798", "image": "ScienceQA/images/12532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Las Vegas\n(B) Cheyenne\n(C) Carson City\n(D) Little Rock" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.109375, "loss": 4.33596134185791 }, { "id": "identity_279615", "image": "ScienceQA/images/20878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Duncan's experiment to answer?\nChoices:\n(A) Will steel nails rust faster when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails rust quicker in water than in vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 4.535321235656738 }, { "id": "identity_111512", "image": "Super-CLEVR/images/superCLEVR_new_001015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly behind the purple motorbike?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 28.21875, "loss": 4.608312129974365 }, { "id": "identity_22343", "image": "A-OKVQA/images/578131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the horse shown here taking part in?\nChoices:\n(A) steeple chase\n(B) racing\n(C) barrel racing\n(D) roping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.219966888427734 }, { "id": "identity_210382", "image": "A-OKVQA/images/326217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.328125, "loss": 5.163151741027832 }, { "id": "identity_229855", "image": "TextVQA/images/01596249e27d5124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many people are in the image and what are they doing?" }, { "from": "gpt", "value": "The answer is nine people, various activities" } ], "clip_score": 18.078125, "loss": 4.267517566680908 }, { "id": "identity_293171", "image": "IconQA/images/43564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of triangles an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.671875, "loss": 4.135083198547363 }, { "id": "identity_338530", "image": "PMC-VQA/images/PMC7360306_F2_213981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the new cavity located on the left lower lobe?\nChoices:\n(A) posterior basal segment\n(B) medial segment\n(C) anterior basal segment\n(D) superior segment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.382613182067871 }, { "id": "identity_31112", "image": "PMC-VQA/images/PMC7265440_Fig2_182397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrows highlight in the image? \nChoices:\n(A) distribution patterns \n(B) RNA species \n(C) both A and B \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.921875, "loss": 4.34853458404541 }, { "id": "identity_280302", "image": "Super-CLEVR/images/superCLEVR_new_001851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that have the same shape as the large metal object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.375, "loss": 4.993268013000488 }, { "id": "identity_18977", "image": "FigureQA/images/94553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Coral have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.140625, "loss": 4.420003890991211 }, { "id": "identity_121701", "image": "VizWiz/images/VizWiz_train_00014133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the table?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.375, "loss": 4.119076728820801 }, { "id": "identity_126612", "image": "VQA2.0/images/310981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen in the background?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.375, "loss": 4.590146064758301 }, { "id": "identity_119371", "image": "TextVQA/images/d5ddd3a4372fa3f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the blue couch?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.828125, "loss": 4.48362398147583 }, { "id": "identity_20010", "image": "A-OKVQA/images/517570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the goal of the man working on the boat here?\nChoices:\n(A) waterproofing\n(B) decorative\n(C) spy craft\n(D) weight loss" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.5625, "loss": 4.656443119049072 }, { "id": "identity_327064", "image": "TextVQA/images/f6d013b6ccc62564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of pack is this box for?" }, { "from": "gpt", "value": "The answer is gift" } ], "clip_score": 13.484375, "loss": 4.830796241760254 }, { "id": "identity_328910", "image": "TQA/images/question_images/parts_microscope_7198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would you look through this?\n\nChoices:\n(A) Nose piece\n(B) Slide\n(C) Ocular lenses\n(D) Stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.609375, "loss": 4.340230464935303 }, { "id": "identity_292447", "image": "PMC-VQA/images/PMC6920284_Fig5_86553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the image?\nOptions:\n(A) CT scan\n(B) PET scan\n(C) Ultrasound\n(D) MR scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.390625, "loss": 4.118786334991455 }, { "id": "identity_208765", "image": "TextVQA/images/9ff36f284db50ec9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.0, "loss": 4.525355339050293 }, { "id": "identity_318803", "image": "IconQA/images/101955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a sun. What is the fourth image?\nChoices:\n(A) barn\n(B) tractor\n(C) sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.09375, "loss": 4.444717884063721 }, { "id": "identity_20939", "image": "A-OKVQA/images/472102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these people wait on?\nChoices:\n(A) dinner\n(B) bus\n(C) plane\n(D) moss" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.28125, "loss": 4.461898326873779 }, { "id": "identity_336485", "image": "IconQA/images/85394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. What is the fifth image?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.198122978210449 }, { "id": "identity_133404", "image": "TextVQA/images/c860cc34e422b334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the floor tiles at the base of the staircase?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 21.734375, "loss": 4.225103855133057 }, { "id": "identity_11589", "image": "IconQA/images/33547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 4.461270332336426 }, { "id": "identity_320086", "image": "A-OKVQA/images/49119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are vessels carrying goods called?\nChoices:\n(A) freighter\n(B) sail boat\n(C) cargo ship\n(D) goods ship" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.625, "loss": 4.082477569580078 }, { "id": "identity_321018", "image": "A-OKVQA/images/297419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is near these flowers?\nChoices:\n(A) dog\n(B) rooster\n(C) cat\n(D) cow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.671875, "loss": 4.140246391296387 }, { "id": "identity_8206", "image": "A-OKVQA/images/244471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this giraffe located?\nChoices:\n(A) wild\n(B) zoo\n(C) circus\n(D) museum" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.671875, "loss": 4.268671989440918 }, { "id": "identity_299526", "image": "A-OKVQA/images/11805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is taking place? \nOptions:\n(A) wedding\n(B) birthday party\n(C) farmers market\n(D) reception" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.484375, "loss": 4.258109092712402 }, { "id": "identity_7967", "image": "FigureQA/images/19523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.640625, "loss": 4.168317794799805 }, { "id": "identity_32865", "image": "A-OKVQA/images/53542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color are the rectangular bricks on the very bottom of the tower?\nChoices:\n(A) red\n(B) white\n(C) gray\n(D) blue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0, "loss": 4.045264720916748 }, { "id": "identity_266796", "image": "FigureQA/images/24056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Green or Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.03125, "loss": 4.5519585609436035 }, { "id": "identity_223764", "image": "TextVQA/images/bf475d45b6ec7ef1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.25, "loss": 4.470946311950684 }, { "id": "identity_122319", "image": "A-OKVQA/images/60992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Two people" } ], "clip_score": 12.390625, "loss": 4.507182598114014 }, { "id": "identity_32045", "image": "PMC-VQA/images/PMC7865835_cancers-13-00424-f003_361956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red and orange colors in the DSC curve represent? \nChoices:\n(A) Healthy tissue in the brain \n(B) Blood flow in the brain \n(C) Regions of the brain affected by Alzheimer's disease \n(D) Areas of the brain affected by a tumor " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.296875, "loss": 4.0256123542785645 }, { "id": "identity_145297", "image": "DocVQA/images/xkjp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the document?" }, { "from": "gpt", "value": "The answer is industrydocuments.ucsf.edu" } ], "clip_score": 15.3359375, "loss": 4.135232925415039 }, { "id": "identity_301993", "image": "FigureQA/images/89202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5703125, "loss": 4.0417094230651855 }, { "id": "identity_321076", "image": "A-OKVQA/images/63275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what kind of store are these items displayed?\nChoices:\n(A) toy\n(B) hardware\n(C) appliance\n(D) plumbing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.031633377075195 }, { "id": "identity_207571", "image": "PMC-VQA/images/PMC7008751_FIG2_113810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct circular shapes are visible in the non-blurred portion of the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.75, "loss": 4.353660583496094 }, { "id": "identity_38108", "image": "FigureQA/images/95850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Blue have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.484375, "loss": 4.201789379119873 }, { "id": "identity_6116", "image": "ScienceQA/images/11530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) slippery\n(B) sweet\n(C) salty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.21875, "loss": 4.164891242980957 }, { "id": "identity_191896", "image": "PMC-VQA/images/PMC5390980_pgen.1006694.g003_152729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall color tone of the image?" }, { "from": "gpt", "value": "The answer is Grayscale" } ], "clip_score": 18.28125, "loss": 4.539467811584473 }, { "id": "identity_323190", "image": "TextVQA/images/91a539f545ccb325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What camp is mentioned on this sign?" }, { "from": "gpt", "value": "The answer is bordello" } ], "clip_score": 40.03125, "loss": 4.116063594818115 }, { "id": "identity_205601", "image": "VQA2.0/images/112059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of keyboard is on the desk?" }, { "from": "gpt", "value": "The answer is M-Audio" } ], "clip_score": 29.140625, "loss": 4.0249457359313965 }, { "id": "identity_223867", "image": "FigureQA/images/16891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Seafoam and Khaki, and is it greater than the percentage of Gold?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 34.34375, "loss": 4.093193054199219 }, { "id": "identity_299522", "image": "ScienceQA/images/277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\nChoices:\n(A) Frankfort\n(B) Nashville\n(C) Louisville\n(D) Lexington" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.453125, "loss": 4.049497604370117 }, { "id": "identity_278410", "image": "ScienceQA/images/16367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Hartford\n(B) Trenton\n(C) Newark\n(D) Jersey City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5625, "loss": 4.050283908843994 }, { "id": "identity_323384", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are these signs for?\nChoices:\n(A) drivers\n(B) directions\n(C) downtown\n(D) pedestrians" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.224915981292725 }, { "id": "identity_308494", "image": "IconQA/images/40848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a fish. Which image is fourth?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.120014190673828 }, { "id": "identity_11045", "image": "PMC-VQA/images/PMC6675056_pone.0220380.g001_17388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the white arrows in column 1 indicating? \nChoices:\n(A) a tumorous mass \n(B) a blood vessel \n(C) a nasal septum \n(D) a maxillary sinus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.765625, "loss": 4.8578619956970215 }, { "id": "identity_129535", "image": "VQA2.0/images/218282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sun loungers can be counted in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.03125, "loss": 4.259178638458252 }, { "id": "identity_23205", "image": "A-OKVQA/images/140843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How was this ball propelled forward?\nChoices:\n(A) blown on\n(B) kicked\n(C) dribbled\n(D) batted" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.5, "loss": 4.337942600250244 }, { "id": "identity_21623", "image": "PMC-VQA/images/PMC4661912_ijms-16-26054-f004_448410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view shows the tracer activity from the side of the brain? \nChoices:\n(A) Sagittal \n(B) Coronal \n(C) Transverse \n(D) Frontal " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 4.261773109436035 }, { "id": "identity_320091", "image": "DocVQA/images/sthd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of the year is mentioned in the page?" }, { "from": "gpt", "value": "The answer is november" } ], "clip_score": 17.9375, "loss": 4.390471458435059 }, { "id": "identity_235753", "image": "TextVQA/images/bdbf26933c859932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the box to the right of the shirt and what is the number written on the box below it?" }, { "from": "gpt", "value": "The answer is nowc-zipper-blck\" and \"50" } ], "clip_score": 18.53125, "loss": 5.248717308044434 }, { "id": "identity_185850", "image": "PMC-VQA/images/PMC7695848_Fig5_307960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the double line labeled 'ab' indicate in the image?" }, { "from": "gpt", "value": "The answer is A ligament" } ], "clip_score": 25.359375, "loss": 4.180575370788574 }, { "id": "identity_254186", "image": "PMC-VQA/images/PMC5557509_Fig2_203153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color pattern of the cervical channel compared to the surrounding tissue in the first scenario, and how does this pattern change in the second scenario?" }, { "from": "gpt", "value": "The answer is The color pattern changes from red to another color" } ], "clip_score": 24.390625, "loss": 4.014200687408447 }, { "id": "identity_316708", "image": "A-OKVQA/images/364201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main color of this vehicle similar to?\nChoices:\n(A) grass\n(B) flamingo\n(C) daisy\n(D) sky" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.703125, "loss": 4.108824729919434 }, { "id": "identity_271264", "image": "PMC-VQA/images/PMC4767903_F8_5955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is the subject of the histopathology shown in the image: Heart, Brain, Lung, or Liver?\nChoices:\n(A) Heart \n(B) Brain \n(C) Lung \n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.984375, "loss": 4.620499610900879 }, { "id": "identity_325258", "image": "A-OKVQA/images/501493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual in the orange cap doing?\nChoices:\n(A) scoring game\n(B) yelling insults\n(C) spotting cheaters\n(D) establishing rhythm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.0078125, "loss": 4.16770601272583 }, { "id": "identity_276923", "image": "PMC-VQA/images/PMC5295295_ijerph-14-00044-f002_123866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the rat's body was analyzed in this study? \nOptions:\n(A) lungs\n(B) heart\n(C) thyroid and liver\n(D) brain and kidney" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.328125, "loss": 4.372357368469238 }, { "id": "identity_280730", "image": "TextVQA/images/4748e3595f9984b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company does the red banner represent?" }, { "from": "gpt", "value": "The answer is santand" } ], "clip_score": 15.5234375, "loss": 4.3108649253845215 }, { "id": "identity_6619", "image": "FigureQA/images/50175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5625, "loss": 4.098736763000488 }, { "id": "identity_303389", "image": "PMC-VQA/images/PMC4964051_Fig2_56474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white dotted line in the image indicate?\nChoices:\n(A) Skin condition \n(B) Lymphoid cell infiltration \n(C) Epidermal structure \n(D) Blood vessels" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.578125, "loss": 4.402307033538818 }, { "id": "identity_299475", "image": "AI2D/images/3003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The myelin sheath surrounds which part of the neuron?\nChoices:\n(A) C\n(B) F\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.6969313621521 }, { "id": "identity_319952", "image": "A-OKVQA/images/214385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What age group is most represented here?\nChoices:\n(A) seniors\n(B) teens\n(C) children\n(D) adults" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5, "loss": 4.714291572570801 }, { "id": "identity_222903", "image": "TextVQA/images/1b7dfdac663874aa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color on the label of the middle bottle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.421875, "loss": 4.312294960021973 }, { "id": "identity_16528", "image": "IconQA/images/67953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a barn. Which picture is second?\nChoices:\n(A) tractor\n(B) barn\n(C) sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.5, "loss": 4.185470104217529 }, { "id": "identity_26277", "image": "A-OKVQA/images/302141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is this location?\nChoices:\n(A) canada\n(B) spain\n(C) mexico\n(D) france" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.6875, "loss": 4.423878192901611 }, { "id": "identity_19774", "image": "A-OKVQA/images/455588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for how the boat is situated?\nChoices:\n(A) mooring\n(B) lassoing\n(C) docked\n(D) anchored" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.625, "loss": 4.460265159606934 }, { "id": "identity_221971", "image": "A-OKVQA/images/402960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many helmets are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.890625, "loss": 4.382084369659424 }, { "id": "identity_3167", "image": "FigureQA/images/19525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.921875, "loss": 4.498047828674316 }, { "id": "identity_331005", "image": "A-OKVQA/images/21213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the weight in the back?\nChoices:\n(A) paper weight\n(B) weigh money\n(C) balance table\n(D) measure price" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.53125, "loss": 4.660157203674316 }, { "id": "identity_14739", "image": "ScienceQA/images/6462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Phoenix\n(B) Tucson\n(C) Olympia\n(D) Fort Wayne" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.984375, "loss": 4.407195568084717 }, { "id": "identity_133227", "image": "TextVQA/images/be1689ff9171aad9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing glasses?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.96875, "loss": 4.31179666519165 }, { "id": "identity_113993", "image": "VQA-RAD/images/synpic46539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the diaphragm be clearly visualized in this image?" }, { "from": "gpt", "value": "The answer is Partially" } ], "clip_score": 29.8125, "loss": 4.345977783203125 }, { "id": "identity_3343", "image": "ScienceQA/images/9014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) rough-legged hawk\n(B) grizzly bear\n(C) brown lemming" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0, "loss": 4.473360538482666 }, { "id": "identity_308868", "image": "Super-CLEVR/images/superCLEVR_new_000885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small cyan matte object shaped like the tiny green rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.984375, "loss": 5.216101169586182 }, { "id": "identity_157656", "image": "AI2D/images/1825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the snake play in this grassland ecosystem?" }, { "from": "gpt", "value": "The answer is Secondary consumer" } ], "clip_score": 30.28125, "loss": 4.318917274475098 }, { "id": "identity_268655", "image": "VQA-AS/images/5663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color does the smallest cat have?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 21.015625, "loss": 4.7457051277160645 }, { "id": "identity_320842", "image": "IconQA/images/99219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a moon. What is the third image?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.25, "loss": 4.216568946838379 }, { "id": "identity_209174", "image": "A-OKVQA/images/338258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in the water?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.234375, "loss": 4.638897895812988 }, { "id": "identity_21741", "image": "A-OKVQA/images/113757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of classroom could this be called?\nChoices:\n(A) podium\n(B) amphitheater\n(C) stadium\n(D) enclave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0, "loss": 4.758066654205322 }, { "id": "identity_272785", "image": "FigureQA/images/74764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Light Gold or Dark Turquoise?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.71875, "loss": 4.6256585121154785 }, { "id": "identity_330519", "image": "A-OKVQA/images/43431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a major event in this sport?\nChoices:\n(A) wimbledon\n(B) indy 500\n(C) superbowl\n(D) world series" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.71875, "loss": 4.13686990737915 }, { "id": "identity_276504", "image": "VQA2.0/images/133273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bathroom features are visible in the mirror's reflection?" }, { "from": "gpt", "value": "The answer is cabinet" } ], "clip_score": 29.96875, "loss": 4.360281467437744 }, { "id": "identity_310316", "image": "VQA-AS/images/13652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more of these animals or these objects?" }, { "from": "gpt", "value": "The answer is logs" } ], "clip_score": 21.625, "loss": 4.5595197677612305 }, { "id": "identity_188257", "image": "A-OKVQA/images/71603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of building is reflected in the glass windows of the building on the left?" }, { "from": "gpt", "value": "The answer is A historic or classic architectural style building" } ], "clip_score": 12.984375, "loss": 4.288670539855957 }, { "id": "identity_186555", "image": "TextVQA/images/003d25460307a847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible clipboards are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.1484375, "loss": 4.806240558624268 }, { "id": "identity_29675", "image": "Super-CLEVR/images/superCLEVR_new_000437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small yellow road bikes than gray metal wagons?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.828125, "loss": 5.220521450042725 }, { "id": "identity_267360", "image": "A-OKVQA/images/581458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of train is depicted in the image provided? \nOptions:\n(A) oil\n(B) coal\n(C) solar\n(D) elevated" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.609375, "loss": 4.251880168914795 }, { "id": "identity_295509", "image": "A-OKVQA/images/364522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the silver object being carried by the man made of?\nChoices:\n(A) nylon\n(B) wood\n(C) metal\n(D) plastic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.7109375, "loss": 4.43754768371582 }, { "id": "identity_32800", "image": "ScienceQA/images/5255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Oklahoma?\nChoices:\n(A) Oklahoma City\n(B) Cedar Rapids\n(C) Austin\n(D) Tulsa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.875, "loss": 4.183509349822998 }, { "id": "identity_111877", "image": "Super-CLEVR/images/superCLEVR_new_000238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object that is furthest to the right in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.671875, "loss": 4.460862159729004 }, { "id": "identity_287460", "image": "PMC-VQA/images/PMC6292629_pone.0209169.g003_411329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the focal uptake observed in the PET/CT image situated?\nChoices:\n(A) Right lobe of the prostate\n(B) Left lobe of the prostate\n(C) Upper lobe of the lung\n(D) Lower lobe of the lung" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.25, "loss": 4.848799705505371 }, { "id": "identity_289075", "image": "TextVQA/images/aecf6e390f953951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written between the two lamps at the top?" }, { "from": "gpt", "value": "The answer is royal institution" } ], "clip_score": 22.203125, "loss": 4.177574157714844 }, { "id": "identity_212358", "image": "A-OKVQA/images/59281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red hats?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.03125, "loss": 4.235759735107422 }, { "id": "identity_125190", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car's roof?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 20.109375, "loss": 4.287996768951416 }, { "id": "identity_274370", "image": "FigureQA/images/91093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Dark Turquoise: (A) no or (B) yes?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.375, "loss": 4.318021297454834 }, { "id": "identity_283930", "image": "A-OKVQA/images/243820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item is located to the right of the sink?\nChoices:\n(A) cat\n(B) cardboard box\n(C) scissors\n(D) roach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.671875, "loss": 4.228370666503906 }, { "id": "identity_309134", "image": "IconQA/images/104098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a sun. What is the ninth image?\nChoices:\n(A) tractor\n(B) barn\n(C) sun" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.125, "loss": 4.417085647583008 }, { "id": "identity_328448", "image": "PMC-VQA/images/PMC2588615_F1_30719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these CT scans show?\nChoices:\n(A) Back of the body \n(B) Front of the body \n(C) Upper extremities \n(D) Lower extremities" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.796875, "loss": 4.19108247756958 }, { "id": "identity_26977", "image": "A-OKVQA/images/388933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white chord around the woman's neck?\nChoices:\n(A) string\n(B) necklace\n(C) twine\n(D) headphone wire" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 4.83984375, "loss": 4.347550392150879 }, { "id": "identity_237939", "image": "TextVQA/images/1d09b6208473c6e6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the screen shown on the phone, and what are the steps involved in completing the task it describes?" }, { "from": "gpt", "value": "The answer is Test TV control functionality and confirm success or failure" } ], "clip_score": 24.359375, "loss": 5.457708358764648 }, { "id": "identity_19997", "image": "AI2D/images/1974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A food web is shown. Killer whale is a/an ?\nChoices:\n(A) Carnivores\n(B) Herbivores\n(C) Omnivores\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.9375, "loss": 4.405919075012207 }, { "id": "identity_24717", "image": "VQA2.0/images/437970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of ship can be seen in the background?" }, { "from": "gpt", "value": "The answer is cargo" } ], "clip_score": 16.78125, "loss": 4.067239284515381 }, { "id": "identity_13014", "image": "TQA/images/abc_question_images/cycle_water_14929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter J represent?\nChoices:\n(A) RUN OFF\n(B) PERCOLATION\n(C) PRECIPITATION\n(D) CONDENSATION" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.25, "loss": 5.029909610748291 }, { "id": "identity_334982", "image": "PMC-VQA/images/PMC6635362_Fig3_9667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color indicates pneumococcal density in the images?\nChoices:\n(A) blue\n(B) green\n(C) red\n(D) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.15625, "loss": 4.411135196685791 }, { "id": "identity_311348", "image": "ScienceQA/images/679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) slippery\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.640625, "loss": 4.274482250213623 }, { "id": "identity_277659", "image": "IconQA/images/45915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of this object? Is it a cone, sphere, cube, or cylinder?\nChoices:\n(A) cone\n(B) sphere\n(C) cube\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.109375, "loss": 4.081129550933838 }, { "id": "identity_33006", "image": "TextVQA/images/eeb5f298ba418088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the magazine?" }, { "from": "gpt", "value": "The answer is ten" } ], "clip_score": 20.3125, "loss": 4.850861549377441 }, { "id": "identity_297694", "image": "ScienceQA/images/7464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does a slide belong to: solid or liquid?\nChoices:\n(A) solid\n(B) liquid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.34375, "loss": 4.110959053039551 }, { "id": "identity_278920", "image": "AI2D/images/1905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism is at the top of the food chain in this diagram? \nChoices:\n(A) hawk\n(B) snake\n(C) grasshopper\n(D) lizard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.921875, "loss": 4.092547416687012 }, { "id": "identity_280161", "image": "DVQA/images/bar_train_00120599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the highest total accuracy?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 32.8125, "loss": 4.467126369476318 }, { "id": "identity_266930", "image": "FigureQA/images/69019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.28125, "loss": 4.095297336578369 }, { "id": "identity_184544", "image": "A-OKVQA/images/179153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.953125, "loss": 4.453907489776611 }, { "id": "identity_293460", "image": "A-OKVQA/images/61181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vehicles are represented by the green and white VW bugs? \n(A) race cars \n(B) cabs \n(C) police car \n(D) buses" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.609375, "loss": 4.3671875 }, { "id": "identity_209660", "image": "PMC-VQA/images/PMC4934358_sensors-16-00933-f006_48208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the barrier in the image?" }, { "from": "gpt", "value": "The answer is White or grey" } ], "clip_score": 26.046875, "loss": 4.181971073150635 }, { "id": "identity_33478", "image": "PMC-VQA/images/PMC8195932_Fig3_464740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the cavernous ICA segment in the patient? \nChoices:\n(A) Left posterior genu \n(B) Right anterior genu \n(C) Left anterior genu \n(D) Right posterior genu. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.453125, "loss": 4.060177803039551 }, { "id": "identity_284828", "image": "A-OKVQA/images/308760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal does the very large toy resemble? Is it a cow, dog, horse, or elephant?\nChoices:\n(A) cow\n(B) dog\n(C) horse\n(D) elephant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.390625, "loss": 4.051790714263916 }, { "id": "identity_77605", "image": "ScienceQA/images/3548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geographical feature is prominently displayed in this image?" }, { "from": "gpt", "value": "The answer is Butte" } ], "clip_score": 22.59375, "loss": 4.421817302703857 }, { "id": "identity_300305", "image": "FigureQA/images/64445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5, "loss": 4.546680927276611 }, { "id": "identity_268330", "image": "PMC-VQA/images/PMC5615704_materials-10-01049-f008_220472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible on the left side of the newly-formed bone?\nChoices:\n(A) Tissue rich in angiogenesis \n(B) Dense fibrous connective tissue \n(C) Both A and B \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.360533237457275 }, { "id": "identity_277876", "image": "TabMWP/images/35340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which event in the schedule ends at 8.25 P.M.?\nChoices:\n(A) track meet\n(B) baseball game\n(C) women's tennis match\n(D) golf tournament" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.6875, "loss": 4.286688804626465 }, { "id": "identity_328323", "image": "A-OKVQA/images/108500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does this large vehicle stop here?\nChoices:\n(A) fixing vehicle\n(B) traffic\n(C) truck broken\n(D) to advertise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2265625, "loss": 4.286677360534668 }, { "id": "identity_2785", "image": "Super-CLEVR/images/superCLEVR_new_001955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of large gray things that are in front of the cyan mountain bike and big yellow rubber road bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.46875, "loss": 4.992091178894043 }, { "id": "identity_263311", "image": "A-OKVQA/images/260182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of setting is depicted in this street scene?\nChoices:\n(A) rural\n(B) urban\n(C) farm\n(D) suburban" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.765625, "loss": 4.739914894104004 }, { "id": "identity_18638", "image": "VizWiz/images/VizWiz_train_00010738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what this is? " }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 20.046875, "loss": 4.376781940460205 }, { "id": "identity_265681", "image": "PMC-VQA/images/PMC4864780_Fig5_29969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of organisms are depicted in the micrographs?\nChoices:\n(A) Bacteria \n(B) Zooplankton \n(C) Phytoplankton \n(D) Fungi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.078125, "loss": 4.256243705749512 }, { "id": "identity_59592", "image": "FigureQA/images/67935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the steepest slope between x-axis labels 20 and 40?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 26.0625, "loss": 4.169075012207031 }, { "id": "identity_312362", "image": "PMC-VQA/images/PMC8118428_F0001_439517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of the enlarged lymph nodes seen in this view?\nChoices:\n(A) Spherical \n(B) Irregular \n(C) Oval \n(D) Round" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 4.109986305236816 }, { "id": "identity_28679", "image": "AI2D/images/10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image represents Fagonia indica?\nChoices:\n(A) D\n(B) E\n(C) A\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.625, "loss": 4.134749889373779 }, { "id": "identity_306624", "image": "FigureQA/images/1298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.546875, "loss": 4.039025783538818 }, { "id": "identity_13651", "image": "PMC-VQA/images/PMC6351849_acel12848-fig-0004_428866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is stained in red color in the images? \nChoices:\n(A) TAF staining \n(B) Telomeres \n(C) GFP and p21-positive xenotransplanted fibroblasts \n(D) Myofibre nuclei " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.156996726989746 }, { "id": "identity_319367", "image": "ScienceQA/images/6870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these places is indicated on the map?\nChoices:\n(A) Seattle\n(B) San Francisco\n(C) Las Vegas\n(D) Los Angeles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.84375, "loss": 4.629395008087158 }, { "id": "identity_333125", "image": "ScienceQA/images/10683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Santa Fe\n(B) Jefferson City\n(C) Honolulu\n(D) Salem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.1377058029174805 }, { "id": "identity_26785", "image": "PMC-VQA/images/PMC7470842_fig3_247953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the basis of the subcellular membrane structures depicted in the schematic diagrams? \nChoices:\n(A) electron microscopy \n(B) fluorescence microscopy \n(C) light microscopy \n(D) transmission electron microscopy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.609375, "loss": 4.1957106590271 }, { "id": "identity_278130", "image": "ScienceQA/images/10243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) Vanuatu\n(B) Fiji\n(C) New Zealand\n(D) Nauru" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.75, "loss": 4.36767053604126 }, { "id": "identity_113498", "image": "A-OKVQA/images/203702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible zebras are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.796875, "loss": 4.424319267272949 }, { "id": "identity_312663", "image": "AI2D/images/4465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which footprint is highly curved?\nChoices:\n(A) normal\n(B) high\n(C) pronated\n(D) flat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.65625, "loss": 4.312349796295166 }, { "id": "identity_250869", "image": "IconQA/images/95762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does the given shape have, and where are they located?" }, { "from": "gpt", "value": "The answer is Two lines, vertical and horizontal" } ], "clip_score": 17.515625, "loss": 4.228777885437012 }, { "id": "identity_269734", "image": "FigureQA/images/46247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5625, "loss": 4.5278520584106445 }, { "id": "identity_263532", "image": "ScienceQA/images/1150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this sandwich bag?\nChoices:\n(A) plastic\n(B) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.953125, "loss": 4.217937469482422 }, { "id": "identity_301833", "image": "A-OKVQA/images/323421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items are found here?\nChoices:\n(A) posters\n(B) hams\n(C) pots\n(D) sausages" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.40625, "loss": 4.291008472442627 }, { "id": "identity_13207", "image": "A-OKVQA/images/172439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: All riders rely on each other to safely ride at the same what?\nChoices:\n(A) hour\n(B) level\n(C) speed\n(D) house" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.84375, "loss": 4.192174434661865 }, { "id": "identity_338441", "image": "PMC-VQA/images/PMC3699433_F6_215215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of aortic dissection shown here?\nChoices:\n(A) Type I \n(B) Type II \n(C) Type III \n(D) Type IV" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5, "loss": 4.102313995361328 }, { "id": "identity_198699", "image": "PMC-VQA/images/PMC3206434_F1_113893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiopaque object indicated by the arrow?" }, { "from": "gpt", "value": "The answer is A swallowed coin" } ], "clip_score": 20.515625, "loss": 4.271228313446045 }, { "id": "identity_9143", "image": "TQA/images/question_images/atomic_structure_9016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which particle has a positive electric charge?\nChoices:\n(A) Nucleus\n(B) Proton\n(C) Electron\n(D) Neutron" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 4.092776298522949 }, { "id": "identity_317420", "image": "A-OKVQA/images/63791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do people play soccer in this place?\nChoices:\n(A) farm\n(B) park\n(C) tundra\n(D) city mall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.4375, "loss": 4.422279357910156 }, { "id": "identity_23135", "image": "Super-CLEVR/images/superCLEVR_new_001733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the large aeroplane on the right side of the blue shiny minivan and the brown airliner made of the same material?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.234375, "loss": 4.98242712020874 }, { "id": "identity_106797", "image": "Super-CLEVR/images/superCLEVR_new_000668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the object that is in front of the purple shiny car?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 24.640625, "loss": 4.186824798583984 }, { "id": "identity_329569", "image": "TQA/images/question_images/optics_reflection_9178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the angle of incidence is increased, what will happen to this angle?\nChoices:\n(A) This angle will increase by a greater degree\n(B) This angle will remain the same\n(C) This angle will increase by the same degree\n(D) This angle will decrease" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.3125, "loss": 4.367319107055664 }, { "id": "identity_335088", "image": "A-OKVQA/images/504974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was this photo most likely taken?\nChoices:\n(A) north america\n(B) south america\n(C) europe\n(D) africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1484375, "loss": 4.379918098449707 }, { "id": "identity_108622", "image": "TextVQA/images/9e47d83a0cc47352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What artistic technique is represented by the image in the bottom left quadrant?" }, { "from": "gpt", "value": "The answer is Brulure" } ], "clip_score": 19.765625, "loss": 4.55823278427124 }, { "id": "identity_128281", "image": "VQA2.0/images/346290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs of the animal can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.4375, "loss": 4.128829479217529 }, { "id": "identity_273319", "image": "ScienceQA/images/17983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which event occurs first based on the timeline provided?\nChoices:\n(A) California's population surpasses 300,000.\n(B) Gold becomes more difficult to find in California." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.40625, "loss": 4.053519248962402 }, { "id": "identity_294988", "image": "ScienceQA/images/13261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\n(A) Manchester\n(B) Providence\n(C) Augusta\n(D) Newport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 4.1261420249938965 }, { "id": "identity_11439", "image": "A-OKVQA/images/147970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The man is throwing the frisbee behind his back and under what body part?\nChoices:\n(A) left arm\n(B) right leg\n(C) right arm\n(D) left leg" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.1875, "loss": 4.039666652679443 }, { "id": "identity_302408", "image": "ScienceQA/images/14223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) bouncy\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.4120378494262695 }, { "id": "identity_115954", "image": "Super-CLEVR/images/superCLEVR_new_001519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the object to the right of the purple bicycle made of?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 22.03125, "loss": 4.373423099517822 }, { "id": "identity_114424", "image": "VQA2.0/images/53803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of parking meter is shown in the image?" }, { "from": "gpt", "value": "The answer is Digital" } ], "clip_score": 23.90625, "loss": 4.991291046142578 }, { "id": "identity_292274", "image": "TabMWP/images/855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.8125, "loss": 4.2234206199646 }, { "id": "identity_271294", "image": "Super-CLEVR/images/superCLEVR_new_001008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the red bus located to the left of the object behind the utility bike?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 26.5, "loss": 4.122403144836426 }, { "id": "identity_330709", "image": "A-OKVQA/images/532216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual in the foreground wearing shorts doing?\nChoices:\n(A) handstands\n(B) eating\n(C) sleeping\n(D) playing tennis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.6875, "loss": 4.090942859649658 }, { "id": "identity_17974", "image": "AI2D/images/4243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest layer shown?\nChoices:\n(A) none of the above\n(B) bedrock\n(C) A-horizon\n(D) O-horizon" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.203125, "loss": 4.4386420249938965 }, { "id": "identity_195602", "image": "VQA2.0/images/131087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures can be seen on the land in the background?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 11.1484375, "loss": 4.8253655433654785 }, { "id": "identity_9592", "image": "TextVQA/images/8309f77b504ef446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does this say?" }, { "from": "gpt", "value": "The answer is happry" } ], "clip_score": 17.578125, "loss": 4.338481426239014 }, { "id": "identity_36029", "image": "PMC-VQA/images/PMC3206434_F1_113893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where did the seed migrate to eventually? \nChoices:\n(A) Right side of the middle abdomen \n(B) Right kidney \n(C) Left side of the middle abdomen \n(D) Left kidney " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.8125, "loss": 4.0343708992004395 }, { "id": "identity_4771", "image": "TQA/images/question_images/biomes_8016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are tropical forests mostly located?\nChoices:\n(A) Near the equator\n(B) In the southern hemisphere\n(C) In the northern hemisphere \n(D) Near the poles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.21875, "loss": 4.041224002838135 }, { "id": "identity_326201", "image": "Super-CLEVR/images/superCLEVR_new_000156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the vehicle that is the same color as the aircraft?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.1875, "loss": 4.47686767578125 }, { "id": "identity_26686", "image": "A-OKVQA/images/178258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What role does this man play?\nChoices:\n(A) actor\n(B) motorcycle racer\n(C) stuntman\n(D) terrorist" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.546875, "loss": 4.449734687805176 }, { "id": "identity_297920", "image": "A-OKVQA/images/314390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mood should this type of event evoke in the attendees?\nChoices:\n(A) excited\n(B) joyous\n(C) bored\n(D) angry" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0703125, "loss": 4.419424057006836 }, { "id": "identity_303006", "image": "AI2D/images/3185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost part of this structure?\nChoices:\n(A) none of the above\n(B) chloroplast\n(C) cell wall\n(D) ribosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.453125, "loss": 4.452154636383057 }, { "id": "identity_301396", "image": "TextVQA/images/bb62f2d28c307de2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is used in this ad?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.515625, "loss": 4.370591163635254 }, { "id": "identity_308154", "image": "IconQA/images/10103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.598421573638916 }, { "id": "identity_289110", "image": "IconQA/images/40745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on, yellow or white?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.90625, "loss": 4.546266555786133 }, { "id": "identity_32872", "image": "PMC-VQA/images/PMC4844105_pone.0154174.g006_23944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the position of the dorsal? \nChoices:\n(A) to the bottom and medial to the left \n(B) to the left and medial to the top \n(C) to the top and medial to the right \n(D) to the right and medial to the top " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6015625, "loss": 4.222054481506348 }, { "id": "identity_331024", "image": "PMC-VQA/images/PMC8150050_RSOS210644F3_450987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the cyan color indicate in these images?\nChoices:\n(A) Dead coral tissue \n(B) Healthy coral tissue \n(C) Algae \n(D) Water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 5.0070672035217285 }, { "id": "identity_327196", "image": "IconQA/images/70974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the second image?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 4.52103853225708 }, { "id": "identity_30026", "image": "A-OKVQA/images/523349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The instrument in the picture is used to play for?\nChoices:\n(A) snowboarding\n(B) skiing\n(C) surfing\n(D) skating" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.061620235443115 }, { "id": "identity_207853", "image": "A-OKVQA/images/363845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the boat in the foreground?" }, { "from": "gpt", "value": "The answer is Brown with a blue strip" } ], "clip_score": 23.640625, "loss": 4.1462626457214355 }, { "id": "identity_276616", "image": "VQA-AS/images/12912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items are currently located on the table?" }, { "from": "gpt", "value": "The answer is wine food" } ], "clip_score": 21.96875, "loss": 4.496807098388672 }, { "id": "identity_3268", "image": "VizWiz/images/VizWiz_train_00013069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is this black blue" } ], "clip_score": 22.8125, "loss": 5.8742499351501465 }, { "id": "identity_317279", "image": "AI2D/images/3595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is C in the image?\nChoices:\n(A) diaphragm\n(B) trachea\n(C) rib\n(D) mouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.671875, "loss": 4.255617141723633 }, { "id": "identity_274620", "image": "VizWiz/images/VizWiz_train_00006424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is sweetarts candy tabs" } ], "clip_score": 28.234375, "loss": 4.114957809448242 }, { "id": "identity_136928", "image": "DVQA/images/bar_train_00051336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is height" } ], "clip_score": 31.140625, "loss": 4.294948577880859 }, { "id": "identity_304806", "image": "FigureQA/images/56426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smallest segment?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.25, "loss": 4.108223915100098 }, { "id": "identity_3606", "image": "Super-CLEVR/images/superCLEVR_new_001351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the cyan thing that is to the right of the small blue thing?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 16.4375, "loss": 4.427666664123535 }, { "id": "identity_285407", "image": "A-OKVQA/images/106375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for having a bike on the back of his vehicle?\nChoices:\n(A) transporting it\n(B) stealing it\n(C) found it\n(D) selling it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.458251476287842 }, { "id": "identity_285740", "image": "DVQA/images/bar_train_00022216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.53125, "loss": 4.468655109405518 }, { "id": "identity_19299", "image": "VQA-AS/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the boy looking at?" }, { "from": "gpt", "value": "The answer is birds" } ], "clip_score": 24.046875, "loss": 4.350097179412842 }, { "id": "identity_318521", "image": "AI2D/images/2235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the grasshopper?\nChoices:\n(A) A\n(B) I\n(C) F\n(D) C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.34375, "loss": 4.527157783508301 }, { "id": "identity_157716", "image": "ScienceQA/images/1791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the sky in the image?" }, { "from": "gpt", "value": "The answer is Clear sky" } ], "clip_score": 17.625, "loss": 4.032658100128174 }, { "id": "identity_220170", "image": "VQA-AS/images/3756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.34375, "loss": 4.033413410186768 }, { "id": "identity_278034", "image": "IconQA/images/51054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 4.0868635177612305 }, { "id": "identity_62298", "image": "DVQA/images/bar_train_00183982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the 'today' store?" }, { "from": "gpt", "value": "The answer is taste" } ], "clip_score": 27.6875, "loss": 4.1807541847229 }, { "id": "identity_115209", "image": "TextVQA/images/3fbdd8ec7e440208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the club where the event is taking place?" }, { "from": "gpt", "value": "The answer is Ement Club" } ], "clip_score": 16.78125, "loss": 5.209056854248047 }, { "id": "identity_134214", "image": "DocVQA/images/sljf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the email from PJRUPPEL to PJDANIEL?" }, { "from": "gpt", "value": "The answer is Pioglitazone statement" } ], "clip_score": 22.828125, "loss": 4.302005767822266 }, { "id": "identity_278953", "image": "A-OKVQA/images/343185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the Energizers typically utilized?\nChoices:\n(A) painting\n(B) eating\n(C) medicine\n(D) power" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.5703125, "loss": 4.031938076019287 }, { "id": "identity_284399", "image": "A-OKVQA/images/509339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does the rectangular object on the left wall serve for?\nChoices:\n(A) storage\n(B) vision\n(C) water flow\n(D) electrical power" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.1796875, "loss": 4.504936695098877 }, { "id": "identity_328430", "image": "PMC-VQA/images/PMC5282821_Fig1_121366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the tumor located?\nChoices:\n(A) Sagittal plane\n(B) Coronal plane\n(C) Axial plane\n(D) Transverse plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.03125, "loss": 4.371323585510254 }, { "id": "identity_238870", "image": "Super-CLEVR/images/superCLEVR_new_001831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the sizes of all yellow objects in the image and determine if the yellow matte object behind the rubber jet is the largest among them" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.328125, "loss": 4.55180025100708 }, { "id": "identity_274696", "image": "AI2D/images/3101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the letter E in the given choices?\nChoices:\n(A) Vacuole\n(B) Plastide\n(C) Centrosoom\n(D) Chromosoom" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.0625, "loss": 4.655481815338135 }, { "id": "identity_320776", "image": "A-OKVQA/images/442685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being relied on to hold them up?\nChoices:\n(A) two horses\n(B) person\n(C) board\n(D) string" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.8515625, "loss": 4.77589225769043 }, { "id": "identity_18777", "image": "FigureQA/images/73907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Light Sky Blue have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.21875, "loss": 4.335528373718262 }, { "id": "identity_247090", "image": "PMC-VQA/images/PMC7982529_F4_396784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential differential diagnoses for the staining at the left caudothalamic notch, and how can you differentiate between them using imaging characteristics?" }, { "from": "gpt", "value": "The answer is hemorrhage" } ], "clip_score": 28.953125, "loss": 4.265839576721191 }, { "id": "identity_72526", "image": "TQA/images/question_images/earth_day_night_96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are used to represent sunlight in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 24.359375, "loss": 4.364912509918213 }, { "id": "identity_335987", "image": "A-OKVQA/images/87649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the gold object near the front of this?\nChoices:\n(A) shield\n(B) handle\n(C) bell\n(D) helmet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.453125, "loss": 4.55387544631958 }, { "id": "identity_12398", "image": "FigureQA/images/80367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Saddle Brown have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.4375, "loss": 4.109066009521484 }, { "id": "identity_27619", "image": "PMC-VQA/images/PMC9427313_fig2_389351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk symbol indicate in the POCUS image? \nChoices:\n(A) Left portal branch \n(B) Right portal branch \n(C) Inferior vena cava \n(D) Aorta " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.25, "loss": 4.43295955657959 }, { "id": "identity_36362", "image": "A-OKVQA/images/555118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would you use to speak to the clerk?\nChoices:\n(A) dutch\n(B) spanish\n(C) english\n(D) french" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.0625, "loss": 4.065643310546875 }, { "id": "identity_335188", "image": "PMC-VQA/images/PMC4703759_F2_459831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these images show?\nChoices:\n(A) Microbial mat on a barite chimney\n(B) SEM images of a microbial mat\n(C) Higher magnification of barite crystals\n(D) SEM images of a barite mine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.3125, "loss": 4.575899600982666 }, { "id": "identity_314755", "image": "PMC-VQA/images/PMC3630103_ppat-1003312-g005_199699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow and green arrows show in the images? \nChoices:\n(A) Regions of inflammation and normal joints \n(B) CHIKV infected and normal joints \n(C) Treatment and control groups \n(D) Regions of inflammation and viruses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.46875, "loss": 4.462969779968262 }, { "id": "identity_190506", "image": "VizWiz/images/VizWiz_train_00015430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of material is the wall made of?" }, { "from": "gpt", "value": "The answer is Brick" } ], "clip_score": 20.0625, "loss": 4.543753147125244 }, { "id": "identity_297609", "image": "A-OKVQA/images/252952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the individuals seated with the small child? Is it in a restaurant, museum, playground, or square?\nChoices:\n(A) restaurant\n(B) museum\n(C) playground\n(D) square" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.125, "loss": 4.502468109130859 }, { "id": "identity_6774", "image": "FigureQA/images/43306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.390625, "loss": 4.5124969482421875 }, { "id": "identity_301855", "image": "FigureQA/images/68049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 4.006730079650879 }, { "id": "identity_1950", "image": "A-OKVQA/images/53720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What role is the man on the red helmet?\nChoices:\n(A) pitcher\n(B) umpire\n(C) catcher\n(D) hitter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.303311824798584 }, { "id": "identity_1952", "image": "A-OKVQA/images/543084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of leaves do the trees have?\nChoices:\n(A) scale\n(B) needles\n(C) broadleaf\n(D) grass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.41015625, "loss": 4.242764472961426 }, { "id": "identity_312134", "image": "TQA/images/question_images/ocean_waves_9150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the lowest part of a wave?\nChoices:\n(A) wave height\n(B) steepness\n(C) trough\n(D) crest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.640625, "loss": 4.098030090332031 }, { "id": "identity_196967", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 28.171875, "loss": 4.01563024520874 }, { "id": "identity_300340", "image": "DVQA/images/bar_train_00107775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is cold" } ], "clip_score": 16.796875, "loss": 5.394038200378418 }, { "id": "identity_208530", "image": "VQA2.0/images/424907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.234375, "loss": 4.144357204437256 }, { "id": "identity_269278", "image": "FigureQA/images/12547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Orange Red or Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.234375, "loss": 4.6335344314575195 }, { "id": "identity_2124", "image": "AI2D/images/2218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown by label I?\nChoices:\n(A) decay-c\n(B) decay-b\n(C) grow-a\n(D) grow-b" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.0, "loss": 4.111025333404541 }, { "id": "identity_327682", "image": "VQA2.0/images/326143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vehicle next to the black truck?" }, { "from": "gpt", "value": "The answer is van" } ], "clip_score": 24.84375, "loss": 4.4313201904296875 }, { "id": "identity_128091", "image": "VizWiz/images/VizWiz_train_00002558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the character's nose?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 17.125, "loss": 4.282196044921875 }, { "id": "identity_10460", "image": "Super-CLEVR/images/superCLEVR_new_000000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of small red cars to the left of the utility bike greater than the number of big gray metal trucks?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.265625, "loss": 5.082576751708984 }, { "id": "identity_323893", "image": "PMC-VQA/images/PMC7239467_pone.0233288.g005_174825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) Blood\n(B) Synthetic resin\n(C) A baby\n(D) Water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.270045280456543 }, { "id": "identity_121851", "image": "VQA2.0/images/329640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tallest visible structure in the background?" }, { "from": "gpt", "value": "The answer is The mountain" } ], "clip_score": 20.28125, "loss": 5.470474720001221 }, { "id": "identity_6688", "image": "FigureQA/images/10114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.8125, "loss": 4.0200300216674805 }, { "id": "identity_5192", "image": "TextVQA/images/3658ba6668c2255b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letters are on the musician's shirt?" }, { "from": "gpt", "value": "The answer is pe" } ], "clip_score": 16.84375, "loss": 4.645306587219238 }, { "id": "identity_51665", "image": "DVQA/images/bar_train_00105422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest average value across all groups?" }, { "from": "gpt", "value": "The answer is making" } ], "clip_score": 22.59375, "loss": 4.731187343597412 }, { "id": "identity_13947", "image": "ScienceQA/images/9727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) Montpelier\n(C) Harrisburg\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.09375, "loss": 4.226844310760498 }, { "id": "identity_199282", "image": "PMC-VQA/images/PMC8155358_F2_452888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the abnormal signal appear to be affecting the surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is No significant effect observed" } ], "clip_score": 34.0, "loss": 4.743048667907715 }, { "id": "identity_282699", "image": "PMC-VQA/images/PMC6517476_F2_479594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of tissue blocks were dissected from the brain: sagittal blocks, coronal blocks, horizontal blocks, or rectangled blocks?\nChoices:\n(A) sagittal blocks \n(B) coronal blocks \n(C) horizontal blocks \n(D) rectangled blocks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.21875, "loss": 4.035717487335205 }, { "id": "identity_36516", "image": "PMC-VQA/images/PMC6383528_f10_438541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What CT scan setting was used to visualize the pneumolabyrinth?\nChoices:\n(A) soft tissue window \n(B) lung window \n(C) bone window \n(D) contrast window " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5, "loss": 4.107700347900391 }, { "id": "identity_318731", "image": "PMC-VQA/images/PMC7329524_Fig3_203295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image indicate? \nChoices:\n(A) Bowel perforation \n(B) Ureteronephrosis \n(C) Hydronephrosis \n(D) Biliary obstruction." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.90625, "loss": 4.5976386070251465 }, { "id": "identity_233045", "image": "PMC-VQA/images/PMC4312452_Fig4_354171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of valve shown in the right image and describe its key structural features that differentiate it from the other types of valves" }, { "from": "gpt", "value": "The answer is Interior of ventral valve with muscle scars and attachment points" } ], "clip_score": 21.71875, "loss": 4.080543041229248 }, { "id": "identity_287102", "image": "ScienceQA/images/8050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city on the map is indicated by the marking? \nChoices:\n(A) Pittsburgh\n(B) Boston\n(C) Philadelphia\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.421875, "loss": 4.368325233459473 }, { "id": "identity_298660", "image": "VQA-AS/images/6920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the boy currently engaged in dancing?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.46875, "loss": 4.398636817932129 }, { "id": "identity_213679", "image": "VQA2.0/images/450621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are under the umbrella?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.0, "loss": 4.745963096618652 }, { "id": "identity_233154", "image": "VQA2.0/images/96969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest this location is not in the U.S., and where might it be based on these indicators?" }, { "from": "gpt", "value": "The answer is Russia or another Eastern European country" } ], "clip_score": 17.515625, "loss": 4.021088600158691 }, { "id": "identity_205030", "image": "PMC-VQA/images/PMC4877350_Fig9_33144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pelvic masses are visible in this image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.046875, "loss": 4.439309120178223 }, { "id": "identity_298546", "image": "A-OKVQA/images/81768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the right side of the street, which type of sportswear is available for purchase? \nChoices:\n(A) mouthpieces\n(B) shoes\n(C) belts\n(D) kneepads" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.265625, "loss": 4.1441264152526855 }, { "id": "identity_323601", "image": "A-OKVQA/images/2687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the coats yellow in color?\nChoices:\n(A) camouflage\n(B) dress code\n(C) visibility\n(D) fashion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.65625, "loss": 4.300282955169678 }, { "id": "identity_324029", "image": "A-OKVQA/images/438709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event are these individuals likely attending?\nChoices:\n(A) vacation\n(B) demolition\n(C) mob hit\n(D) diplomatic meeting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.328125, "loss": 4.129540920257568 }, { "id": "identity_34965", "image": "A-OKVQA/images/355481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An American motorcycle rally held annually in which place?\nChoices:\n(A) rapid city\n(B) sturgis\n(C) pierre\n(D) deadwood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.078782081604004 }, { "id": "identity_47745", "image": "DocVQA/images/qlfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who left the message?" }, { "from": "gpt", "value": "The answer is Jordan Rich" } ], "clip_score": 21.34375, "loss": 6.179647445678711 }, { "id": "identity_301805", "image": "VizWiz/images/VizWiz_train_00005762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is this measuring cup?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 29.96875, "loss": 4.182644844055176 }, { "id": "identity_292309", "image": "PMC-VQA/images/PMC5842618_Fig2_285302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the ADC map depict?\nChoices:\n(A) A liver tumor\n(B) A pancreatic carcinoma\n(C) The surgical procedure\n(D) The change in surgery." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.59375, "loss": 4.076056957244873 }, { "id": "identity_104907", "image": "VQA-AS/images/19149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.953125, "loss": 4.990041255950928 }, { "id": "identity_133886", "image": "TextVQA/images/b0d861635dcab8ca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What product is being advertised by Engelhardt's in the image?" }, { "from": "gpt", "value": "The answer is Löwen-Pasta" } ], "clip_score": 23.828125, "loss": 4.775116920471191 }, { "id": "identity_320008", "image": "A-OKVQA/images/526414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this white machine used to manipulate?\nChoices:\n(A) paper\n(B) fabric\n(C) metal\n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.7265625, "loss": 4.467877388000488 }, { "id": "identity_261675", "image": "DocVQA/images/hkfg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of this meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Notes" } ], "clip_score": 22.71875, "loss": 4.672101020812988 }, { "id": "identity_276216", "image": "ScienceQA/images/12124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.193772315979004 }, { "id": "identity_255039", "image": "A-OKVQA/images/274541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are using electronic devices, and what types of devices are they using?" }, { "from": "gpt", "value": "The answer is Three people using laptop computers" } ], "clip_score": 26.328125, "loss": 4.005163192749023 }, { "id": "identity_213866", "image": "A-OKVQA/images/412094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the train number as indicated in the image?" }, { "from": "gpt", "value": "The answer is Number visible on the train" } ], "clip_score": 23.625, "loss": 4.4641337394714355 }, { "id": "identity_328038", "image": "A-OKVQA/images/137134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport requires this person to lay almost flat to their boards?\nChoices:\n(A) snowboarding\n(B) ski jumping\n(C) ski climbing\n(D) ski crossing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.921875, "loss": 4.133152961730957 }, { "id": "identity_321970", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is the area on the left with sign displays most likely to be?\nChoices:\n(A) shopping mall\n(B) strip mall\n(C) market\n(D) rest stop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.7109375, "loss": 4.423489570617676 }, { "id": "identity_249867", "image": "PMC-VQA/images/PMC6825074_Fig5_59906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the anatomical structures surrounding the bulbous swelling seen on the MRI image, and what is their relative positioning?" }, { "from": "gpt", "value": "The answer is ECRL and superficial supinator, distal and superficial" } ], "clip_score": 31.171875, "loss": 4.045374393463135 }, { "id": "identity_211603", "image": "VQA2.0/images/83060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many burners are on the stove?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.75, "loss": 4.43166971206665 }, { "id": "identity_284876", "image": "DocVQA/images/ttwn0081_18.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the increase in CRES levels impact the particle size distribution?" }, { "from": "gpt", "value": "The answer is worsened" } ], "clip_score": 12.453125, "loss": 4.994566440582275 }, { "id": "identity_311376", "image": "FigureQA/images/42702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.046875, "loss": 4.073586940765381 }, { "id": "identity_293369", "image": "A-OKVQA/images/354721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What provides momentum for the skater?\nChoices:\n(A) dog\n(B) gasoline\n(C) petrol\n(D) wheels" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.28125, "loss": 4.158251762390137 }, { "id": "identity_187672", "image": "TextVQA/images/051c411d3cdc2446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sizes are available for this shirt as mentioned in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 18.5625, "loss": 4.018658638000488 }, { "id": "identity_331234", "image": "PMC-VQA/images/PMC6475549_fig1_468927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality found in this scan?\nChoices:\n(A) Sphingomyelin deposition \n(B) Septal enhancement \n(C) Bronchitis \n(D) Asthma." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.328125, "loss": 4.719130039215088 }, { "id": "identity_258954", "image": "Super-CLEVR/images/superCLEVR_new_000149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is in front of the small matte object and behind the red matte SUV?" }, { "from": "gpt", "value": "The answer is silver" } ], "clip_score": 24.546875, "loss": 4.83234167098999 }, { "id": "identity_286686", "image": "A-OKVQA/images/221004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow and green objects have in common?\nChoices:\n(A) age\n(B) caliber\n(C) species\n(D) model" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.9296875, "loss": 4.386292457580566 }, { "id": "identity_319857", "image": "TextVQA/images/947b4b17b0447c21.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are these yellow cars?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 18.71875, "loss": 4.73692512512207 }, { "id": "identity_33906", "image": "TextVQA/images/d5dab54f1eee6ef4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: if is the chinese?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 15.3828125, "loss": 4.426819324493408 }, { "id": "identity_13832", "image": "DVQA/images/bar_train_00018876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.953125, "loss": 4.3494133949279785 }, { "id": "identity_288520", "image": "PMC-VQA/images/PMC3635941_F3_200806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is being utilized in this caption?\nOptions:\n(A) CT\n(B) X-ray\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.84375, "loss": 4.015522003173828 }, { "id": "identity_294708", "image": "A-OKVQA/images/10484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is situated above the ironwork on the train that is moving away?\nChoices:\n(A) coffee shop\n(B) factory\n(C) storefront\n(D) train track" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.421875, "loss": 4.378322601318359 }, { "id": "identity_198416", "image": "VQA-AS/images/9410.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.296875, "loss": 4.271257400512695 }, { "id": "identity_316602", "image": "PMC-VQA/images/PMC4331836_F1_359717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to visualize this? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) PET scan \n(D) MRI scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.1875, "loss": 4.632165431976318 }, { "id": "identity_38511", "image": "IconQA/images/68509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) cone\n(B) cube\n(C) sphere\n(D) cylinder" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 4.221423149108887 }, { "id": "identity_276008", "image": "TQA/images/question_images/cycle_nitrogen_6711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is responsible for producing organic matter?\n(A) fertilizers\n(B) plants and animals\n(C) farms and runoff\n(D) Fossil fuel emissions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.431473731994629 }, { "id": "identity_26693", "image": "IconQA/images/80229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is ninth?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.46875, "loss": 4.318244934082031 }, { "id": "identity_26471", "image": "PMC-VQA/images/PMC7921273_Fig1_380477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the tumor edges in the EOB-MRI images?\nChoices:\n(A) Hypointense \n(B) Hyperintense \n(C) Isointense \n(D) Multicolored" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.10877799987793 }, { "id": "identity_278820", "image": "DVQA/images/bar_train_00043443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In any category, which object is the least favored?" }, { "from": "gpt", "value": "The answer is flesh" } ], "clip_score": 24.078125, "loss": 4.558757305145264 }, { "id": "identity_287889", "image": "VQA2.0/images/496617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this room?" }, { "from": "gpt", "value": "The answer is conference" } ], "clip_score": 21.28125, "loss": 4.580257415771484 }, { "id": "identity_106829", "image": "Super-CLEVR/images/superCLEVR_new_001972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is furthest to the left in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.4375, "loss": 4.185290336608887 }, { "id": "identity_319793", "image": "VQA2.0/images/49640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was this photo taken?" }, { "from": "gpt", "value": "The answer is outside" } ], "clip_score": 18.71875, "loss": 4.0879387855529785 }, { "id": "identity_280059", "image": "ScienceQA/images/5985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate?\nChoices:\n(A) climate\n(B) weather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2421875, "loss": 4.495962142944336 }, { "id": "identity_336119", "image": "TabMWP/images/2543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: They have $107.00. Is it enough to buy a model railroad set and a stuffed animal?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.53125, "loss": 4.150788307189941 }, { "id": "identity_275095", "image": "PMC-VQA/images/PMC4455327_Fig4_393478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which plane were the MR images of rat livers taken?\nChoices:\n(A) transverse plane\n(B) sagittal plane\n(C) coronal plane\n(D) vertical plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.78125, "loss": 4.55871057510376 }, { "id": "identity_274681", "image": "Super-CLEVR/images/superCLEVR_new_001785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do big purple metallic cars exist?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.671875, "loss": 4.864956855773926 }, { "id": "identity_9502", "image": "TextVQA/images/1c18369be23ddf63.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: are you all red sox fans?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 27.578125, "loss": 4.632127285003662 }, { "id": "identity_264913", "image": "TextVQA/images/5082475b716d3933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general strike product?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.4921875, "loss": 4.024111270904541 }, { "id": "identity_113327", "image": "VQA2.0/images/138579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the stands?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.140625, "loss": 4.4941935539245605 }, { "id": "identity_331932", "image": "PMC-VQA/images/PMC5471892_Fig4_179489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the Maltese dog was shown in the histological sections?\nChoices:\n(A) Right tibia\n(B) Left humerus\n(C) Right ear\n(D) Left eye" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.546875, "loss": 4.345076084136963 }, { "id": "identity_24165", "image": "FigureQA/images/26442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray greater than Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.078125, "loss": 4.1351776123046875 }, { "id": "identity_38031", "image": "PMC-VQA/images/PMC5798916_Fig1_271805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the spinal lipoma seen in the image?\nChoices:\n(A) Cervical level \n(B) Thoracic level \n(C) Lumbar level \n(D) Sacral level" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.515625, "loss": 4.060524940490723 }, { "id": "identity_11698", "image": "Super-CLEVR/images/superCLEVR_new_000066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a metal mountain bike that has the same color as the small rubber thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.203125, "loss": 5.242383003234863 }, { "id": "identity_281346", "image": "PMC-VQA/images/PMC7856147_F7_358596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscope was utilized to capture the SEM image? \nOptions:\n(A) Transmission electron microscope\n(B) Scanning electron microscope\n(C) Optical microscope\n(D) Confocal microscope" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.640625, "loss": 4.120201110839844 }, { "id": "identity_108236", "image": "VQA2.0/images/471647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many framed pictures are hanging on the walls?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 23.125, "loss": 4.767035961151123 }, { "id": "identity_38205", "image": "VQA-AS/images/8164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is closest to the left?" }, { "from": "gpt", "value": "The answer is picture" } ], "clip_score": 20.828125, "loss": 4.9558634757995605 }, { "id": "identity_14029", "image": "A-OKVQA/images/518511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which city does this fire truck perform work?\nChoices:\n(A) new york\n(B) kentucky\n(C) arkansas\n(D) new jersey" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.085608005523682 }, { "id": "identity_329946", "image": "ScienceQA/images/19433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this made of?\nChoices:\n(A) ceramic\n(B) linen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.5625, "loss": 4.281971454620361 }, { "id": "identity_7613", "image": "VQA-AS/images/12623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the boys playing?" }, { "from": "gpt", "value": "The answer is catch" } ], "clip_score": 20.625, "loss": 4.318053245544434 }, { "id": "identity_106535", "image": "TextVQA/images/129ae867a15bb330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many smokestacks are emitting smoke in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.21875, "loss": 4.593441009521484 }, { "id": "identity_212076", "image": "A-OKVQA/images/153368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lamps are there in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.6875, "loss": 4.276386260986328 }, { "id": "identity_255623", "image": "A-OKVQA/images/176571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indications in the image that suggest the weather condition, and how do they collectively confirm the reason people are holding umbrellas?" }, { "from": "gpt", "value": "The answer is It is raining" } ], "clip_score": 25.078125, "loss": 4.214864730834961 }, { "id": "identity_329942", "image": "A-OKVQA/images/237464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do most of the people here hope for today weather wise?\nChoices:\n(A) wind\n(B) sleet\n(C) rain\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.5, "loss": 4.259552955627441 }, { "id": "identity_299361", "image": "ScienceQA/images/15540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices correctly identifies the direction of thermal energy transfer during this time: from each battery to the surroundings or from the surroundings to each battery?\nChoices:\n(A) each battery . . . the surroundings\n(B) the surroundings . . . each battery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.015625, "loss": 4.597020626068115 }, { "id": "identity_56816", "image": "DVQA/images/bar_train_00126383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the left?" }, { "from": "gpt", "value": "The answer is trade" } ], "clip_score": 24.671875, "loss": 4.215479850769043 }, { "id": "identity_289834", "image": "ScienceQA/images/14561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Lamar's experiment to address?\nChoices:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.234375, "loss": 4.020636081695557 }, { "id": "identity_216894", "image": "PMC-VQA/images/PMC3730322_cancers-05-00714-f003_221934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the overall structure in the image?" }, { "from": "gpt", "value": "The answer is Cuboid" } ], "clip_score": 21.03125, "loss": 4.213677883148193 }, { "id": "identity_268098", "image": "IconQA/images/5464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) unlikely\n(B) certain\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.4375, "loss": 4.368686199188232 }, { "id": "identity_323628", "image": "A-OKVQA/images/196418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this person leaning?\nChoices:\n(A) is scared\n(B) is falling\n(C) maintaining balance\n(D) is searching" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0234375, "loss": 4.56449031829834 }, { "id": "identity_302827", "image": "DocVQA/images/xzyh0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the type of water?" }, { "from": "gpt", "value": "The answer is battery supply" } ], "clip_score": 17.0625, "loss": 6.510483264923096 }, { "id": "identity_187417", "image": "A-OKVQA/images/525438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the door to the establishment?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 19.9375, "loss": 4.413374423980713 }, { "id": "identity_24080", "image": "PMC-VQA/images/PMC5721319_F2_250258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red line indicating in the radiograph? \nChoices:\n(A) Femoral neck offset \n(B) Acetabular index \n(C) Hip joint center \n(D) Proximal femoral axis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.749931812286377 }, { "id": "identity_156718", "image": "AI2D/images/2637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of forces are indicated in the diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.15625, "loss": 4.615596771240234 }, { "id": "identity_284438", "image": "Super-CLEVR/images/superCLEVR_new_001859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object to the left of the metallic airplane identical in shape to the yellow metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.40625, "loss": 5.336984157562256 }, { "id": "identity_306163", "image": "IconQA/images/59069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a sub. What is the fifth image?\nChoices:\n(A) sub\n(B) turtle\n(C) fish" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 4.686017036437988 }, { "id": "identity_267891", "image": "AI2D/images/3147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is located at the bottom-most position? \n(A) none of the above\n(B) vein\n(C) stem\n(D) blade" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.609375, "loss": 4.385012149810791 }, { "id": "identity_315353", "image": "VQA2.0/images/377394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do these things have in common?" }, { "from": "gpt", "value": "The answer is for storage" } ], "clip_score": 19.8125, "loss": 4.371216297149658 }, { "id": "identity_34118", "image": "ScienceQA/images/17869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) hard\n(B) soft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.3125, "loss": 4.271108150482178 }, { "id": "identity_55240", "image": "DocVQA/images/ssyp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the applicant's hair color?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 16.34375, "loss": 4.274222373962402 }, { "id": "identity_29239", "image": "PMC-VQA/images/PMC6215355_Fig2_390952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to reveal low signals in the parietal-occipital sulci? \nChoices:\n(A) CT \n(B) MRI \n(C) PET \n(D) X-ray " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.375, "loss": 4.743711948394775 }, { "id": "identity_332785", "image": "PMC-VQA/images/PMC7909442_brainsci-11-00133-f001_375074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What orientation is indicated by the green color on the right images?\nChoices:\n(A) Inferior - Superior.\n(B) Right - Left.\n(C) Anterior - Posterior.\n(D) Unknown orientation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.625, "loss": 4.7021870613098145 }, { "id": "identity_25096", "image": "TQA/images/question_images/hydrocarbons_7052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two carbon atoms were joined using double bond in the hydrocarbon shown in diagram, what would be the result?\nChoices:\n(A) cyclic hydrocarbon\n(B) unsaturated hydrocarbon\n(C) saturated hydrocarbon\n(D) nothing would change" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.90625, "loss": 4.531677722930908 }, { "id": "identity_320193", "image": "TextVQA/images/111cfc154a29698d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the red sign of this store?" }, { "from": "gpt", "value": "The answer is calzature palma" } ], "clip_score": 18.921875, "loss": 4.057270050048828 }, { "id": "identity_28714", "image": "ScienceQA/images/5526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Wisconsin\n(B) Tennessee\n(C) Connecticut\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.0383477210998535 }, { "id": "identity_66753", "image": "AI2D/images/4775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What common feature is shared by the mutant forms of all the plants depicted?" }, { "from": "gpt", "value": "The answer is Reduced or simplified structure" } ], "clip_score": 29.5, "loss": 4.419240951538086 }, { "id": "identity_315970", "image": "Super-CLEVR/images/superCLEVR_new_001212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the bicycle behind the red thing that is on the right side of the green one?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.234375, "loss": 4.894309043884277 }, { "id": "identity_26447", "image": "VQA-AS/images/6569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing?" }, { "from": "gpt", "value": "The answer is falling" } ], "clip_score": 20.265625, "loss": 4.998940944671631 }, { "id": "identity_130760", "image": "Super-CLEVR/images/superCLEVR_new_000434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 18.125, "loss": 4.873415946960449 }, { "id": "identity_24639", "image": "FigureQA/images/33480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.609375, "loss": 4.017966270446777 }, { "id": "identity_335540", "image": "PMC-VQA/images/PMC7787872_fig4_335944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the suspected diagnosis of the mass lesion seen here?\nChoices:\n(A) Myxoma\n(B) Liposarcoma\n(C) Meningioma\n(D) Carcinoma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.96875, "loss": 4.746973991394043 }, { "id": "identity_110880", "image": "VQA-AS/images/18108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White and brown" } ], "clip_score": 25.0, "loss": 4.113221645355225 }, { "id": "identity_241998", "image": "VQA2.0/images/338705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding bicycles in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.78125, "loss": 4.413754463195801 }, { "id": "identity_314383", "image": "A-OKVQA/images/4554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the skill level of most snowboarders in this place?\nChoices:\n(A) beginners\n(B) professional\n(C) olympic\n(D) competitive" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.984375, "loss": 4.533282279968262 }, { "id": "identity_74653", "image": "ScienceQA/images/12295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature located to the northwest of the orange dot?" }, { "from": "gpt", "value": "The answer is Lake Michigan" } ], "clip_score": 33.9375, "loss": 4.3689703941345215 }, { "id": "identity_290884", "image": "PMC-VQA/images/PMC7125094_Fig3_144707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the scale bar in the images? \nChoices:\n(A) 20x magnification \n(B) 102 micrometers \n(C) both A and B \n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5, "loss": 4.087031364440918 }, { "id": "identity_33249", "image": "PMC-VQA/images/PMC3767748_pone-0073297-g003_230614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bottom right picture in (A) showing? \nChoices:\n(A) The distribution of a stack of sticks in the XY view \n(B) A scheme of the view’s distribution in the tomogram \n(C) The segmentation of the tomogram in XY view \n(D) The segmentation of the top view of the tomogram in perspective view " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.59375, "loss": 4.4036736488342285 }, { "id": "identity_312021", "image": "DVQA/images/bar_train_00021722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for any of these datasets?" }, { "from": "gpt", "value": "The answer is call" } ], "clip_score": 35.28125, "loss": 4.020484924316406 }, { "id": "identity_146675", "image": "DocVQA/images/nldg0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of funding for the programs listed in the table?" }, { "from": "gpt", "value": "The answer is WOH Fund" } ], "clip_score": 22.671875, "loss": 5.116779804229736 }, { "id": "identity_16055", "image": "DVQA/images/bar_train_00046428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars contain at least one bar with value greater than 7?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 22.84375, "loss": 4.4623284339904785 }, { "id": "identity_27689", "image": "AI2D/images/3833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the petal.\nChoices:\n(A) B\n(B) C\n(C) A\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.256218910217285 }, { "id": "identity_145539", "image": "DocVQA/images/xhng0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model of the item delivered as mentioned in the delivery slip?" }, { "from": "gpt", "value": "The answer is Model DE" } ], "clip_score": 18.0625, "loss": 4.956765174865723 }, { "id": "identity_9474", "image": "FigureQA/images/24369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.04938268661499 }, { "id": "identity_337951", "image": "PMC-VQA/images/PMC4229202_pone-0112477-g006_335516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is shown in the image?\nChoices:\n(A) Arm\n(B) Head\n(C) Leg\n(D) Abdomen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.796875, "loss": 4.3906145095825195 }, { "id": "identity_294113", "image": "PMC-VQA/images/PMC7330812_rcr2619-fig-0002_203688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the reason behind the blockage observed in the Chest CT scan?\nChoices:\n(A) Blood clot \n(B) Tissue growth \n(C) Procedural complications \n(D) Infection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.28125, "loss": 4.260128974914551 }, { "id": "identity_321243", "image": "PMC-VQA/images/PMC5585196_F2_211657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which perspective was used to differentiate iNPH and HC groups?\nChoices:\n(A) Bottom view \n(B) Front view \n(C) Left view \n(D) Top view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.34375, "loss": 5.007360458374023 }, { "id": "identity_25331", "image": "PMC-VQA/images/PMC7010902_Fig8_114208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe can we see the GRD in this patient? \nChoices:\n(A) Left temporal lobe \n(B) Right temporal lobe \n(C) Left parietal lobe \n(D) Right parietal lobe. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.640625, "loss": 4.100217819213867 }, { "id": "identity_70902", "image": "AI2D/images/3461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pairs of wings does the grasshopper have?" }, { "from": "gpt", "value": "The answer is Two pairs" } ], "clip_score": 31.78125, "loss": 4.106410503387451 }, { "id": "identity_133638", "image": "Super-CLEVR/images/superCLEVR_new_000055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located to the right of the green motorbike?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.78125, "loss": 4.88222599029541 }, { "id": "identity_320031", "image": "PMC-VQA/images/PMC6946360_F1_95413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the image?\nChoices:\n(A) Swelling of septum \n(B) Septum perforation \n(C) Normal septum \n(D) Septum deviation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.171631336212158 }, { "id": "identity_263218", "image": "A-OKVQA/images/543084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the different types of leaves found on the trees?\nChoices:\n(A) scale\n(B) needles\n(C) broadleaf\n(D) grass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.30859375, "loss": 4.173214912414551 }, { "id": "identity_37889", "image": "A-OKVQA/images/315195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What incident is happening in the scene?\nChoices:\n(A) fire\n(B) riot\n(C) running race\n(D) water leakage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.953125, "loss": 4.5181989669799805 }, { "id": "identity_15059", "image": "A-OKVQA/images/331140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will they use the orange ring for?\nChoices:\n(A) hula hoop\n(B) anchor boat\n(C) grab dock\n(D) save drowners" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.4140625, "loss": 4.375124454498291 }, { "id": "identity_311406", "image": "DVQA/images/bar_train_00027525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each category?" }, { "from": "gpt", "value": "The answer is child" } ], "clip_score": 19.890625, "loss": 4.377646446228027 }, { "id": "identity_284078", "image": "A-OKVQA/images/498669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What level of fullness does he aim for the boat to reach by the end of the day?\nChoices:\n(A) half full\n(B) full\n(C) empty\n(D) quarter full" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.625, "loss": 4.440650939941406 }, { "id": "identity_52904", "image": "DocVQA/images/mpcw0217_22.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the paper submitted to Drug Safety?" }, { "from": "gpt", "value": "The answer is SERMs #2" } ], "clip_score": 14.296875, "loss": 4.926070213317871 }, { "id": "identity_328470", "image": "PMC-VQA/images/PMC8425872_acm213355-fig-0020_53993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of case is associated with the dashed dose-volume histogram lines?\nChoices:\n(A) shielded lung case \n(B) unshielded case \n(C) typical lung case \n(D) atypical lung case" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.171875, "loss": 4.20517635345459 }, { "id": "identity_252084", "image": "IconQA/images/60943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this shape have, and what types of polygons are they?" }, { "from": "gpt", "value": "The answer is Four triangular faces" } ], "clip_score": 29.890625, "loss": 4.372694969177246 }, { "id": "identity_146018", "image": "DocVQA/images/ssjw0227_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the institution that provided the photographs?" }, { "from": "gpt", "value": "The answer is The institutions with which the subjects are identified" } ], "clip_score": 21.4375, "loss": 4.49691915512085 }, { "id": "identity_965", "image": "IconQA/images/103541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.484375, "loss": 4.445054531097412 }, { "id": "identity_5484", "image": "A-OKVQA/images/538581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color vest does the person who put these dogs on the surfboard wear?\nChoices:\n(A) purple\n(B) white\n(C) polka dot\n(D) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.09375, "loss": 4.080905437469482 }, { "id": "identity_33913", "image": "PMC-VQA/images/PMC3701324_fig4_215652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the left hemisphere shown on the left in the axial sections? \nChoices:\n(A) For scientific reasons \n(B) For artistic reasons \n(C) For medical reasons \n(D) For no particular reason " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.9188995361328125 }, { "id": "identity_253722", "image": "VQA2.0/images/499623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the device the woman is looking at, and what is attached to it?" }, { "from": "gpt", "value": "The answer is Apple, webcam" } ], "clip_score": 21.890625, "loss": 4.174744606018066 }, { "id": "identity_219218", "image": "A-OKVQA/images/413164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the interior architecture?" }, { "from": "gpt", "value": "The answer is A color name within ten words" } ], "clip_score": 13.7890625, "loss": 5.682888984680176 }, { "id": "identity_43995", "image": "DVQA/images/bar_train_00035616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the most units in the 'travel' shop?" }, { "from": "gpt", "value": "The answer is stores" } ], "clip_score": 31.734375, "loss": 4.531279563903809 }, { "id": "identity_118556", "image": "A-OKVQA/images/318162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are visible on the bed?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.09375, "loss": 4.693294525146484 }, { "id": "identity_303111", "image": "PMC-VQA/images/PMC8195437_ppat.1009632.g003_464513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrows in the insets point to? \nChoices:\n(A) Bacterial particles \n(B) Viral particles \n(C) Parasitic particles \n(D) Fungal particles." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.7488861083984375 }, { "id": "identity_121281", "image": "PMC-VQA/images/PMC8232116_genes-12-00915-f003_479240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall shape of the retinal cross-section?" }, { "from": "gpt", "value": "The answer is Convex" } ], "clip_score": 27.3125, "loss": 4.49971866607666 }, { "id": "identity_38687", "image": "A-OKVQA/images/314515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The people who gather here are taking part in what?\nChoices:\n(A) vacation\n(B) protest\n(C) cycling event\n(D) picnic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.0, "loss": 4.109086513519287 }, { "id": "identity_312734", "image": "A-OKVQA/images/201925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What yellow fluid might be paired with this item?\nChoices:\n(A) paint\n(B) banana milk\n(C) custard\n(D) egg yolk" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.9921875, "loss": 4.2226738929748535 }, { "id": "identity_225122", "image": "Super-CLEVR/images/superCLEVR_new_001728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of the same material as the road bike, and what are their shapes?" }, { "from": "gpt", "value": "The answer is Two objects, bike and rectangular shapes" } ], "clip_score": 26.59375, "loss": 4.584555625915527 }, { "id": "identity_271382", "image": "PMC-VQA/images/PMC7567840_Fig1_277119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following orientations are mentioned in the image? \nOptions:\n(A) Middle, Front, Bottom, Top.\n(B) Superior, Inferior, Right, Left, Anterior, Posterior.\n(C) Coronal, Sagittal, Axial, Vertebra.\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.046875, "loss": 4.022357940673828 }, { "id": "identity_24920", "image": "A-OKVQA/images/511363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is necessary for the toy to be played with properly?\nChoices:\n(A) manual\n(B) wind\n(C) directions\n(D) marshall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9921875, "loss": 4.2173285484313965 }, { "id": "identity_119540", "image": "TextVQA/images/30a348b470348881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the venue for the exhibition as per the poster?" }, { "from": "gpt", "value": "The answer is Teatro do Planetário" } ], "clip_score": 25.046875, "loss": 4.626633644104004 }, { "id": "identity_307273", "image": "TextVQA/images/20e5bcf73f20bdb6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What single letter is visible at the top right of this?" }, { "from": "gpt", "value": "The answer is sinful color" } ], "clip_score": 10.7109375, "loss": 5.55275297164917 }, { "id": "identity_318114", "image": "DocVQA/images/snpf0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is this note addressed to?" }, { "from": "gpt", "value": "The answer is Jeff Rumberger" } ], "clip_score": 22.984375, "loss": 4.975000381469727 }, { "id": "identity_271984", "image": "ScienceQA/images/996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Rhode Island\n(B) Kentucky\n(C) Florida\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.234375, "loss": 4.620923042297363 }, { "id": "identity_217587", "image": "TextVQA/images/9de238fff437f9d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buttons are visible on the person's garment?" }, { "from": "gpt", "value": "The answer is Three buttons" } ], "clip_score": 12.4296875, "loss": 4.489254951477051 }, { "id": "identity_308237", "image": "ScienceQA/images/11580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) South Carolina\n(B) Mississippi\n(C) Connecticut\n(D) Virginia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.515625, "loss": 4.2921576499938965 }, { "id": "identity_322295", "image": "A-OKVQA/images/273579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the animal doing?\nChoices:\n(A) drinking\n(B) painting picture\n(C) eating lunch\n(D) resting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.75, "loss": 4.545046806335449 }, { "id": "identity_282780", "image": "Super-CLEVR/images/superCLEVR_new_000973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more tiny airliners behind the large rubber tandem bike than big gray metallic road bikes behind the SUV?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.390625, "loss": 4.7722578048706055 }, { "id": "identity_270107", "image": "VizWiz/images/VizWiz_train_00002803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this product?" }, { "from": "gpt", "value": "The answer is home phone" } ], "clip_score": 25.078125, "loss": 4.0371809005737305 }, { "id": "identity_235274", "image": "VizWiz/images/VizWiz_train_00016364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text in the captcha and what is the color of the text above the captcha box?" }, { "from": "gpt", "value": "The answer is stock, red" } ], "clip_score": 27.921875, "loss": 4.081575870513916 }, { "id": "identity_291913", "image": "A-OKVQA/images/573750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What equipment are the individuals in the front holding?\nChoices:\n(A) snowboards\n(B) surfboards\n(C) skates\n(D) skateboards" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.140625, "loss": 4.144558906555176 }, { "id": "identity_17149", "image": "AI2D/images/3320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is J?\nChoices:\n(A) Mitochondria\n(B) Lysosome\n(C) Vacuole\n(D) Centriole" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.71875, "loss": 4.171714782714844 }, { "id": "identity_254381", "image": "TextVQA/images/0204574848b0bf05.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many signs are in the trashcan and what is the predominant color of the signs?" }, { "from": "gpt", "value": "The answer is five signs, green" } ], "clip_score": 26.640625, "loss": 4.046259880065918 }, { "id": "identity_329735", "image": "PMC-VQA/images/PMC8414583_F2_50810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where can we observe the granule at an earlier time point?\nChoices:\n(A) Top left \n(B) Top right \n(C) Bottom left \n(D) Bottom right" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.171875, "loss": 4.279201507568359 }, { "id": "identity_575", "image": "A-OKVQA/images/171479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event are the people participating in?\nChoices:\n(A) training\n(B) commuting\n(C) competition\n(D) leisure activity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.7265625, "loss": 4.148768901824951 }, { "id": "identity_13239", "image": "FigureQA/images/80260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.921875, "loss": 4.0228118896484375 }, { "id": "identity_35572", "image": "PMC-VQA/images/PMC4764584_fig3s6_4846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is shown in the image? \nChoices:\n(A) Muscle tissue \n(B) Brain tissue \n(C) Liver tissue \n(D) Skin tissue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.625, "loss": 4.645523548126221 }, { "id": "identity_304904", "image": "PMC-VQA/images/PMC7510301_Fig3_259713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue area signify in the image?\nChoices:\n(A) GTV-T\n(B) GTV-N\n(C) PGTV\n(D) PTV" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.15625, "loss": 4.112210273742676 }, { "id": "identity_16869", "image": "A-OKVQA/images/451951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man with the drink at risk of?\nChoices:\n(A) falling hazard\n(B) fire hazard\n(C) flood hazard\n(D) slipping hazard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.421875, "loss": 4.568894386291504 }, { "id": "identity_34874", "image": "A-OKVQA/images/434354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can often be found under the tree seen here?\nChoices:\n(A) mice\n(B) elves\n(C) dogs\n(D) gifts" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.71875, "loss": 4.1981658935546875 }, { "id": "identity_20243", "image": "Super-CLEVR/images/superCLEVR_new_000832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a rubber thing that is behind the large blue bicycle; what is its shape?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 29.640625, "loss": 4.571129322052002 }, { "id": "identity_263458", "image": "A-OKVQA/images/288229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category does this area belong to? \nChoices:\n(A) commercial\n(B) rural\n(C) country\n(D) residential" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.68359375, "loss": 4.46379280090332 }, { "id": "identity_266186", "image": "A-OKVQA/images/562241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is most likely responsible for casting the nearby shadow? \n(A) palm tree \n(B) cameraman \n(C) dog \n(D) traffic cone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.265625, "loss": 4.724246025085449 }, { "id": "identity_290873", "image": "PMC-VQA/images/PMC5343592_f17_136822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the object being stained in the micrographs among the following choices: (A) Hippocampus, (B) Amygdala, (C) Thalamus, (D) Cerebellum?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 4.117789268493652 }, { "id": "identity_210003", "image": "A-OKVQA/images/195296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are hanging from the wooden structure?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 30.296875, "loss": 4.275787830352783 }, { "id": "identity_129655", "image": "A-OKVQA/images/326583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.484375, "loss": 4.310721397399902 }, { "id": "identity_337334", "image": "A-OKVQA/images/413959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person doing on the carriage?\nChoices:\n(A) making repairs\n(B) is waiting\n(C) is resting\n(D) is stealing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.622186183929443 }, { "id": "identity_15606", "image": "FigureQA/images/6349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.118298530578613 }, { "id": "identity_328740", "image": "PMC-VQA/images/PMC2592545_pone-0003928-g005_30803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain region is indicated in the caption?\nChoices:\n(A) Broca's area\n(B) Visual cortex\n(C) Frontal eye fields\n(D) Somatosensory cortex" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.671875, "loss": 4.009039878845215 }, { "id": "identity_277218", "image": "ScienceQA/images/3969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Oregon?\n(A) Salem\n(B) Helena\n(C) Knoxville\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.921875, "loss": 4.162690162658691 }, { "id": "identity_32851", "image": "A-OKVQA/images/265243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What photographic technique was used to capture the movement of traffic on the road?\nChoices:\n(A) panorama\n(B) hdr\n(C) time-lapse\n(D) bokeh" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.3125, "loss": 4.05602502822876 }, { "id": "identity_1366", "image": "PMC-VQA/images/PMC8309360_plants-10-01442-f001_19466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue arrow point to in the tomato leaf? \nChoices:\n(A) Stomata \n(B) Trichomes \n(C) PD \n(D) Cell Wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 5.209112167358398 }, { "id": "identity_25528", "image": "IconQA/images/43466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past two." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.84375, "loss": 5.1549835205078125 }, { "id": "identity_278898", "image": "PMC-VQA/images/PMC5619555_f3_221675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which view is the 3D brain model seen from the bottom?\nChoices:\n(A) View A\n(B) View B\n(C) View C\n(D) View D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.015625, "loss": 4.381042957305908 }, { "id": "identity_239454", "image": "VQA2.0/images/518649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the two streets intersecting at this location?" }, { "from": "gpt", "value": "The answer is Grosvenor St and [Second Street Name" } ], "clip_score": 18.5, "loss": 4.078824996948242 }, { "id": "identity_264760", "image": "PMC-VQA/images/PMC3795359_F2_237149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which section of the brain is the staining observed?\nChoices:\n(A) Coronal section\n(B) Axial section\n(C) Sagittal section\n(D) Frontal section" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.796875, "loss": 4.246817588806152 }, { "id": "identity_286201", "image": "IconQA/images/26484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Out of the three activities Bella did on Tuesday morning, which one did she do last? \n(A) made the bed\n(B) watched TV\n(C) got dressed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.703125, "loss": 4.155832767486572 }, { "id": "identity_10889", "image": "TextVQA/images/5f2b56adf8d31c9e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the first letter of the sign on the right?" }, { "from": "gpt", "value": "The answer is m" } ], "clip_score": 11.578125, "loss": 4.073917865753174 }, { "id": "identity_24486", "image": "DVQA/images/bar_train_00022216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.203125, "loss": 4.422276973724365 }, { "id": "identity_332208", "image": "PMC-VQA/images/PMC7562879_ccr33064-fig-0001_274937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the two parts of this represent?\nChoices:\n(A) Fat in the bowel\n(B) Gas-filled bowel segments\n(C) Veins in the bowel wall\n(D) Folds in the bowel wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.152746200561523 }, { "id": "identity_276421", "image": "PMC-VQA/images/PMC4805622_Fig7_14393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in the arthroscopic photo of the patient? \nChoices:\n(A) Separation at the chondrolabral junction\n(B) Cam impingement\n(C) Acetabular cartilage\n(D) Pincer impingement" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.375, "loss": 4.459714889526367 }, { "id": "identity_28274", "image": "PMC-VQA/images/PMC4363549_fig3_368539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the treatment shown in the image? \nChoices:\n(A) reduction \n(B) no change \n(C) progression \n(D) relapse " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.75, "loss": 4.876107692718506 }, { "id": "identity_4925", "image": "TextVQA/images/6ebfd89c13a6894a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: if i wanted to reach the grand canal what number would i dial?" }, { "from": "gpt", "value": "The answer is nationals" } ], "clip_score": 21.578125, "loss": 6.166652202606201 }, { "id": "identity_298977", "image": "DocVQA/images/snyc0227_83.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the final column in the table?" }, { "from": "gpt", "value": "The answer is total issues" } ], "clip_score": 20.203125, "loss": 4.987325191497803 }, { "id": "identity_264031", "image": "AI2D/images/2957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the diagram? Is it a neuron, proton, electron, or none of the above?\nChoices:\n(A) neuron\n(B) proton\n(C) none of the above\n(D) electron" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.9375, "loss": 4.3823137283325195 }, { "id": "identity_331596", "image": "VizWiz/images/VizWiz_train_00009256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is one of these buttons labeled something?" }, { "from": "gpt", "value": "The answer is small has 1 plus large has 2 plus" } ], "clip_score": 27.890625, "loss": 4.88003396987915 }, { "id": "identity_264446", "image": "TabMWP/images/20995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Peter have sufficient funds to purchase both a package of pencil lead and a package of crayons with his $0.09?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.375, "loss": 4.107875823974609 }, { "id": "identity_5124", "image": "ScienceQA/images/6165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the omnivore in this food web?\nChoices:\n(A) black rockfish\n(B) zooplankton\n(C) kelp bass\n(D) orca" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.59375, "loss": 4.175047874450684 }, { "id": "identity_33373", "image": "A-OKVQA/images/268356.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the white triangles in the distance surrounded by blue?\nChoices:\n(A) people\n(B) ice caps\n(C) birds\n(D) sailboats" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6328125, "loss": 4.359489440917969 }, { "id": "identity_10154", "image": "PMC-VQA/images/PMC5884912_Fig4_297572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of thrombus shown in the image? \nChoices:\n(A) Bone thrombus \n(B) Gastric thrombus \n(C) Tumor thrombus \n(D) Arterial thrombus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.890625, "loss": 4.669650077819824 }, { "id": "identity_105225", "image": "A-OKVQA/images/31984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing item is the person in the foreground wearing on their upper body?" }, { "from": "gpt", "value": "The answer is A dress" } ], "clip_score": 15.984375, "loss": 4.275562286376953 }, { "id": "identity_245823", "image": "A-OKVQA/images/506508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tall thin things are visible in the image, and what do they carry?" }, { "from": "gpt", "value": "The answer is Two tall thin things carry power lines" } ], "clip_score": 18.078125, "loss": 4.371852397918701 }, { "id": "identity_29998", "image": "PMC-VQA/images/PMC3038968_F1_87161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the abdominal CT reveal about the tumor?\nChoices:\n(A) It is located in the upper abdomen \n(B) It is a small, even tumor \n(C) It is large and uneven \n(D) It is located in the back " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.953125, "loss": 4.039518356323242 }, { "id": "identity_33016", "image": "PMC-VQA/images/PMC7227430_F4_171407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the lymphocyte is being moved to the center? \nChoices:\n(A) The nucleus \n(B) The mitochondria \n(C) The ribosome \n(D) The cytoplasm " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.125, "loss": 4.302781581878662 }, { "id": "identity_10830", "image": "ScienceQA/images/15517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) bumpy\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.578125, "loss": 4.249731540679932 }, { "id": "identity_29513", "image": "A-OKVQA/images/572879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the boy with the helmet in the foreground holding?\nChoices:\n(A) luggage\n(B) basket\n(C) pizza box\n(D) baseball bat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.875, "loss": 4.144692897796631 }, { "id": "identity_299305", "image": "A-OKVQA/images/117838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what activity are these individuals waiting?\nChoices:\n(A) board a bus\n(B) attend a show\n(C) shop at a sale\n(D) receive treatment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.25, "loss": 4.046350002288818 }, { "id": "identity_338420", "image": "Super-CLEVR/images/superCLEVR_new_000240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer SUVs than buses?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.734375, "loss": 5.215139865875244 }, { "id": "identity_304027", "image": "FigureQA/images/18419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Dark Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.015625, "loss": 4.043178081512451 }, { "id": "identity_133084", "image": "Super-CLEVR/images/superCLEVR_new_000280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects in the image that have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.953125, "loss": 5.396949768066406 }, { "id": "identity_66822", "image": "ScienceQA/images/16033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the highest specific humidity within the outlined area?" }, { "from": "gpt", "value": "The answer is Dark Green" } ], "clip_score": 26.5, "loss": 4.114503860473633 }, { "id": "identity_274491", "image": "AI2D/images/2710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time of day does the letter P typically occur?\nChoices:\n(A) midnight\n(B) noon\n(C) sunset\n(D) sunrise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.828125, "loss": 4.459048748016357 }, { "id": "identity_309604", "image": "A-OKVQA/images/184205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening with this?\n\nChoices:\n(A) nothing\n(B) remodeling\n(C) being moved\n(D) being assembled" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.3359375, "loss": 4.357897758483887 }, { "id": "identity_321536", "image": "A-OKVQA/images/519784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the ground near the vehicle?\nChoices:\n(A) arrow\n(B) footprints\n(C) coyote\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 4.98046875, "loss": 4.269210338592529 }, { "id": "identity_312355", "image": "TextVQA/images/03cd4987c51f2db9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the initial word in blue?" }, { "from": "gpt", "value": "The answer is dear" } ], "clip_score": 12.3203125, "loss": 4.966143608093262 }, { "id": "identity_145108", "image": "DVQA/images/bar_train_00036527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a preference percentage for the source above 60%?" }, { "from": "gpt", "value": "The answer is Two categories" } ], "clip_score": 31.296875, "loss": 4.815847873687744 }, { "id": "identity_322727", "image": "A-OKVQA/images/556722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the pink circles on the front of this used for?\nChoices:\n(A) visibility\n(B) light emitting\n(C) design\n(D) brand" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1875, "loss": 4.447451114654541 }, { "id": "identity_24772", "image": "ScienceQA/images/8698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) Africa\n(B) Europe\n(C) Asia\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.09375, "loss": 4.10855770111084 }, { "id": "identity_11254", "image": "FigureQA/images/75523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Coral have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.152802467346191 }, { "id": "identity_217207", "image": "A-OKVQA/images/72769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many balloons are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.359375, "loss": 4.272078514099121 }, { "id": "identity_265322", "image": "TQA/images/abc_question_images/sun_layers_16308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the core:\n(A) Y\n(B) E\n(C) W\n(D) D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.03125, "loss": 4.013247489929199 }, { "id": "identity_702", "image": "VQA-AS/images/13723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wall?" }, { "from": "gpt", "value": "The answer is tan" } ], "clip_score": 19.390625, "loss": 4.250210762023926 }, { "id": "identity_120152", "image": "TextVQA/images/14c6242a2cd93368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word of the last stanza in the poem on the right page?" }, { "from": "gpt", "value": "The answer is The" } ], "clip_score": 20.75, "loss": 4.360811710357666 }, { "id": "identity_279708", "image": "Super-CLEVR/images/superCLEVR_new_001703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To the right of the big purple object on the left side of the large purple thing on the right side of the blue airliner, is there a brown mountain bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.5, "loss": 5.307015419006348 }, { "id": "identity_233929", "image": "PMC-VQA/images/PMC5415772_Fig1_160019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the right panel image, identify the location of the aneurysm in the splenic artery and describe the anatomical landmarks that help determine this location" }, { "from": "gpt", "value": "The answer is middle third" } ], "clip_score": 24.421875, "loss": 6.087549209594727 }, { "id": "identity_49050", "image": "DocVQA/images/jrnc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of the letter?" }, { "from": "gpt", "value": "The answer is Roy" } ], "clip_score": 19.921875, "loss": 5.220430850982666 }, { "id": "identity_271872", "image": "FigureQA/images/78908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Aqua or Cornflower?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.09375, "loss": 4.683382034301758 }, { "id": "identity_218612", "image": "Super-CLEVR/images/superCLEVR_new_001766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have a metallic finish?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.234375, "loss": 4.451742649078369 }, { "id": "identity_221448", "image": "TextVQA/images/12533c9193166ae1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cuisine does the restaurant with the yellow sign offer?" }, { "from": "gpt", "value": "The answer is Indian" } ], "clip_score": 19.953125, "loss": 4.00782585144043 }, { "id": "identity_10820", "image": "VizWiz/images/VizWiz_train_00001637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are my clothes dirty?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.046875, "loss": 4.091798305511475 }, { "id": "identity_20960", "image": "AI2D/images/4842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occurrence is shown in the diagram above?\nChoices:\n(A) Transition\n(B) Volcanic Eruption\n(C) Erosion\n(D) Reproduction" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.8125, "loss": 4.2969794273376465 }, { "id": "identity_273810", "image": "Super-CLEVR/images/superCLEVR_new_001248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any large purple rubber objects located behind the remaining part of the sedan?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.625, "loss": 5.260398864746094 }, { "id": "identity_280290", "image": "ScienceQA/images/1549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate? \nOptions:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.1953125, "loss": 4.424942493438721 }, { "id": "identity_331132", "image": "A-OKVQA/images/477919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of environment are these fruits grown in?\nChoices:\n(A) tropical\n(B) desert\n(C) tundra\n(D) temperate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.09375, "loss": 4.13143253326416 }, { "id": "identity_273306", "image": "PMC-VQA/images/PMC4074105_pone-0100498-g001_301927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the variable that is depicted on the x-axis of the graph among the given choices? \n(A) WT CLD \n(B) Atisa1-1 CLD \n(C) Replotted values \n(D) No variable is represented on the x-axis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.0, "loss": 4.278883934020996 }, { "id": "identity_268734", "image": "FigureQA/images/13265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.0625, "loss": 4.529253005981445 }, { "id": "identity_7627", "image": "A-OKVQA/images/92066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What flying objects are above the crosswalk?\nChoices:\n(A) spaceships\n(B) helicopters\n(C) airplanes\n(D) kites" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.34375, "loss": 4.898503303527832 }, { "id": "identity_186869", "image": "A-OKVQA/images/200643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys can be seen on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.015625, "loss": 4.257293224334717 }, { "id": "identity_24069", "image": "PMC-VQA/images/PMC6075640_FIG6_349949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown by the red arrows in the image? \nChoices:\n(A) Pneumothorax in the right lung \n(B) Tumors in the right lung \n(C) Pleural Effusions in the left lung \n(D) Bronchitis in the left lung " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.265625, "loss": 4.074279308319092 }, { "id": "identity_337352", "image": "PMC-VQA/images/PMC7558323_nanomaterials-10-01870-f0A1_273128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element is indicated in red in the EDX map?\nChoices:\n(A) Si\n(B) O\n(C) Mn\n(D) Fe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5390625, "loss": 4.719902992248535 }, { "id": "identity_43384", "image": "PlotQA/images/15769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country or territory has the highest number of refugees in the chart?" }, { "from": "gpt", "value": "The answer is Europe(all income levels)" } ], "clip_score": 31.96875, "loss": 4.782520771026611 }, { "id": "identity_282346", "image": "PMC-VQA/images/PMC5281579_F5_120924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the picture provided?\nChoices:\n(A) Nerves and muscles \n(B) Blood and lymph vessels \n(C) Bones and joints \n(D) Connective tissue and fat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.1875, "loss": 4.016797065734863 }, { "id": "identity_232006", "image": "PMC-VQA/images/PMC5040342_pone.0163610.g001_78292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections are indicated by the green bars, and what is the color of the bars used to indicate the sectioning level for the section labeled \"C.4\"?" }, { "from": "gpt", "value": "The answer is three sections, black" } ], "clip_score": 16.5625, "loss": 4.652985572814941 }, { "id": "identity_315020", "image": "A-OKVQA/images/442367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is this?\nChoices:\n(A) shopping area\n(B) industrial\n(C) residential area\n(D) office buildings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9609375, "loss": 4.395001411437988 }, { "id": "identity_33283", "image": "PMC-VQA/images/PMC5818857_Fig4_278038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality observed in the color Doppler TEE image? \nChoices:\n(A) Normal flow \n(B) Abnormal flow into the left atrium \n(C) Abnormal flow into the right atrium \n(D) Abnormal flow into the left ventricle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 4.366037845611572 }, { "id": "identity_267458", "image": "TextVQA/images/3fa6c2b883eee9da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the book resting on the man's desk?" }, { "from": "gpt", "value": "The answer is semantic web" } ], "clip_score": 22.234375, "loss": 4.9283833503723145 }, { "id": "identity_108031", "image": "VQA-AS/images/14631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct cloud formations can be seen in the sky?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.765625, "loss": 4.2546892166137695 }, { "id": "identity_37696", "image": "A-OKVQA/images/169116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the woman on the right feeling in this moment?\nChoices:\n(A) amused\n(B) disturbed\n(C) sad\n(D) angry" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.09375, "loss": 4.537016868591309 }, { "id": "identity_117348", "image": "PMC-VQA/images/PMC7885276_f1_368641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is primarily highlighted in the colored area of the image?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 28.265625, "loss": 4.08280611038208 }, { "id": "identity_188571", "image": "TextVQA/images/9a53f70d67f8f617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature should be used to check the property's details?" }, { "from": "gpt", "value": "The answer is Check" } ], "clip_score": 24.375, "loss": 4.410382270812988 }, { "id": "identity_68075", "image": "ScienceQA/images/16072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tubes are connected to the top of the container?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.34375, "loss": 4.256516456604004 }, { "id": "identity_110757", "image": "A-OKVQA/images/56040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many drinking glasses can be seen on the table?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.953125, "loss": 4.48529577255249 }, { "id": "identity_329051", "image": "PMC-VQA/images/PMC7670670_Fig3_299638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the body is shown in the MRI?\nChoices:\n(A) Right side\n(B) Left side\n(C) Both sides\n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.609375, "loss": 4.210165977478027 }, { "id": "identity_320293", "image": "A-OKVQA/images/292730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a candle in the dessert?\nChoices:\n(A) to trick\n(B) to celebrate\n(C) as joke\n(D) for light" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.109375, "loss": 4.357053279876709 }, { "id": "identity_290948", "image": "A-OKVQA/images/399628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the round platform depicted in the image?\nChoices:\n(A) sit still\n(B) turn upsidedown\n(C) nothing\n(D) rotate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.0625, "loss": 4.683964252471924 }, { "id": "identity_194501", "image": "VQA2.0/images/206102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many guitars are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.765625, "loss": 4.543262481689453 }, { "id": "identity_213472", "image": "PMC-VQA/images/PMC4604556_f2_433091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colors are used to label parts of the specimen?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.9375, "loss": 4.065099239349365 }, { "id": "identity_9039", "image": "A-OKVQA/images/305093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sound does the animal make?\nChoices:\n(A) neigh\n(B) woof\n(C) moo\n(D) meow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.5, "loss": 4.243142604827881 }, { "id": "identity_323671", "image": "PMC-VQA/images/PMC7685769_figure2_304241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the mass shown in the MR image?\nChoices:\n(A) cerebral region\n(B) thoracic region\n(C) pelvic region\n(D) cervical region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0625, "loss": 4.417328357696533 }, { "id": "identity_103705", "image": "TextVQA/images/13de5557647e8e13.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stacks of books are visible in the window display?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 28.359375, "loss": 4.377207279205322 }, { "id": "identity_243515", "image": "DocVQA/images/kqph0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are mentioned in the letter, and what is the total number of rooms required for their stay?" }, { "from": "gpt", "value": "The answer is 6 rooms" } ], "clip_score": 15.3359375, "loss": 4.246647834777832 }, { "id": "identity_336443", "image": "FigureQA/images/73471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.890625, "loss": 4.762298583984375 }, { "id": "identity_253623", "image": "TabMWP/images/15377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second difference of the function values in the table, and what does this indicate about the type of function?" }, { "from": "gpt", "value": "The answer is quadratic function" } ], "clip_score": 29.765625, "loss": 5.652621746063232 }, { "id": "identity_336497", "image": "PMC-VQA/images/PMC4875637_Fig2_32538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are depicted in e, f?\nChoices:\n(A) PET images \n(B) T1-weighted images \n(C) T2-weighted images \n(D) CT images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.734375, "loss": 4.202332019805908 }, { "id": "identity_312940", "image": "AI2D/images/2841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter M indicate?\nChoices:\n(A) The Vent\n(B) The Lateral Line\n(C) The Barbels\n(D) The Caudal Peduncle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.250256538391113 }, { "id": "identity_332949", "image": "A-OKVQA/images/396172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of session are they attending?\nChoices:\n(A) college orientation\n(B) work training\n(C) wedding rehearsal\n(D) party planning" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.390625, "loss": 4.198136329650879 }, { "id": "identity_333593", "image": "A-OKVQA/images/174601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the green garment most likely hung up?\nChoices:\n(A) drying it\n(B) signal\n(C) blew there\n(D) reduce draft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.4921875, "loss": 4.418286323547363 }, { "id": "identity_308698", "image": "Super-CLEVR/images/superCLEVR_new_000754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any purple objects of the same shape as this large green object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.25, "loss": 5.463758945465088 }, { "id": "identity_319916", "image": "TabMWP/images/34195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.96875, "loss": 4.202642440795898 }, { "id": "identity_329105", "image": "A-OKVQA/images/579229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of move does the person appear to be doing?\nChoices:\n(A) crab walk\n(B) leap frog\n(C) worm\n(D) electric slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.25, "loss": 4.322587013244629 }, { "id": "identity_23142", "image": "PMC-VQA/images/PMC4863127_fig2_29527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the lesion identified?\nChoices:\n(A) Left posterior wall of the uterus \n(B) Top wall of the uterus \n(C) Right posterior wall of the uterus \n(D) Front wall of the uterus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.375, "loss": 4.593835353851318 }, { "id": "identity_131874", "image": "VQA2.0/images/288662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the image participating in the water activity?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.6875, "loss": 4.319838047027588 }, { "id": "identity_28885", "image": "PMC-VQA/images/PMC9414956_microorganisms-10-01640-f003_385876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What bone is affected by the disease in this image? \nChoices:\n(A) Proximal tibia. \n(B) Distal tibia. \n(C) Proximal fibula. \n(D) Distal fibula. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.359375, "loss": 4.2857513427734375 }, { "id": "identity_298216", "image": "A-OKVQA/images/174766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body is the rider wearing protective gear? \nChoices:\n(A) elbows\n(B) knees\n(C) chest\n(D) head" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.5234375, "loss": 4.588987350463867 }, { "id": "identity_322399", "image": "ScienceQA/images/19402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Salem\n(B) Boise\n(C) Little Rock\n(D) Juneau" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.96875, "loss": 4.044874668121338 }, { "id": "identity_244571", "image": "A-OKVQA/images/469165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the batter's stance and the position of the catcher, determine the handedness of the batter and explain which shoulder he will use to watch the pitcher" }, { "from": "gpt", "value": "The answer is left" } ], "clip_score": 29.265625, "loss": 5.605095863342285 }, { "id": "identity_302240", "image": "VQA2.0/images/265279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is about to hit the ball?" }, { "from": "gpt", "value": "The answer is serving man" } ], "clip_score": 21.671875, "loss": 4.955603122711182 }, { "id": "identity_323511", "image": "A-OKVQA/images/30133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is responsible for the vehicle shown here?\nChoices:\n(A) military\n(B) toy stores\n(C) movie studios\n(D) post office" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.828125, "loss": 4.255204200744629 }, { "id": "identity_13664", "image": "DVQA/images/bar_train_00081125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is card" } ], "clip_score": 20.015625, "loss": 4.165753364562988 }, { "id": "identity_117933", "image": "A-OKVQA/images/360595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the ground?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.640625, "loss": 4.793999195098877 }, { "id": "identity_196640", "image": "A-OKVQA/images/234717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible walking towards the building on the hill?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.859375, "loss": 4.285952568054199 }, { "id": "identity_10672", "image": "VizWiz/images/VizWiz_train_00017344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this picture?" }, { "from": "gpt", "value": "The answer is screenshot icons" } ], "clip_score": 21.84375, "loss": 5.318919658660889 }, { "id": "identity_261306", "image": "FigureQA/images/66131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red a lighter shade than Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.171875, "loss": 4.028721809387207 }, { "id": "identity_316427", "image": "FigureQA/images/91612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.1796875, "loss": 4.208983898162842 }, { "id": "identity_268437", "image": "FigureQA/images/17603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.890625, "loss": 4.1984076499938965 }, { "id": "identity_202880", "image": "A-OKVQA/images/262985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats have visible flags?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.5625, "loss": 4.896108150482178 }, { "id": "identity_297458", "image": "AI2D/images/1654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does A represent in the diagram provided?\nOptions:\n(A) moon\n(B) moon's orbit\n(C) night-time\n(D) earth" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.5625, "loss": 4.013545989990234 }, { "id": "identity_273976", "image": "PMC-VQA/images/PMC8529121_fig5_88602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the species of bee depicted in the SEM image labeled as (B)? Is it:\n(A) Apis mellifera\n(B) Apis dorsata\n(C) Melipona beecheii\n(D) Tetragonula carbonaria" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.609375, "loss": 4.070044040679932 }, { "id": "identity_285692", "image": "VizWiz/images/VizWiz_train_00019201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please inform me about the contents of this glass mug? Thank you." }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 24.984375, "loss": 4.118295669555664 }, { "id": "identity_3174", "image": "A-OKVQA/images/132132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What job does the man behind the stand hold?\nChoices:\n(A) green grocer\n(B) watch salesman\n(C) driver\n(D) butcher" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.546875, "loss": 4.407746315002441 }, { "id": "identity_335538", "image": "VQA-AS/images/3662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color animals are present?" }, { "from": "gpt", "value": "The answer is blue orange gray" } ], "clip_score": 16.5625, "loss": 4.995567321777344 }, { "id": "identity_187784", "image": "TextVQA/images/4caa4860745a3162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many raspberries are on the slice of bread?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 10.546875, "loss": 4.613840103149414 }, { "id": "identity_22659", "image": "PMC-VQA/images/PMC4312452_Fig2_354195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the purple color represent in the right image? \nChoices:\n(A) Dorsal valve \n(B) Spiculation \n(C) Endoskeleton of the plectolophous lophophore \n(D) Ventral valve " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.7734375, "loss": 4.2418036460876465 }, { "id": "identity_284181", "image": "A-OKVQA/images/171192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman in the red apron engaged in?\nChoices:\n(A) bartending\n(B) wine demo\n(C) waiting tables\n(D) party catering" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 4.566100597381592 }, { "id": "identity_285684", "image": "PMC-VQA/images/PMC7397898_vms3273-fig-0002_224754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk in the image represent?\nChoices:\n(A) The left retroperitoneum \n(B) The right circumvaval ureter \n(C) The caudal vena cava \n(D) The mid abdomen." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 4.451523780822754 }, { "id": "identity_118083", "image": "Super-CLEVR/images/superCLEVR_new_000361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the object to the left of the big blue rubber airplane?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 27.5, "loss": 4.037248611450195 }, { "id": "identity_5962", "image": "A-OKVQA/images/560083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these men displaying?\nChoices:\n(A) pilots license\n(B) movie ticket\n(C) entrance ticket\n(D) police badge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 4.3026885986328125 }, { "id": "identity_200333", "image": "A-OKVQA/images/334511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chickens can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.1875, "loss": 4.525137901306152 }, { "id": "identity_57983", "image": "FigureQA/images/50088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the x-axis?" }, { "from": "gpt", "value": "The answer is xaxis label" } ], "clip_score": 28.5625, "loss": 4.331887722015381 }, { "id": "identity_282791", "image": "A-OKVQA/images/561570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the unusual lighting and images in this place?\nChoices:\n(A) too bright\n(B) over-exposure\n(C) excited cameraman\n(D) broken camera" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.15625, "loss": 4.07865047454834 }, { "id": "identity_4293", "image": "TQA/images/abc_question_images/human_system_eye_16137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents a clear, protective covering on the outside of the eye?\nChoices:\n(A) H\n(B) C\n(C) S\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.8125, "loss": 4.2559638023376465 }, { "id": "identity_109547", "image": "VQA2.0/images/220390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 15.4453125, "loss": 4.392566680908203 }, { "id": "identity_276243", "image": "TQA/images/question_images/human_system_eye_6126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the eye varies in color among individuals? \n(A) Pupil \n(B) Cornea \n(C) Iris \n(D) Lens" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.34375, "loss": 4.871429920196533 }, { "id": "identity_12647", "image": "AI2D/images/2632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram below, what label refers to the waning crescent?\nChoices:\n(A) N\n(B) H\n(C) G\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.59375, "loss": 4.0272603034973145 }, { "id": "identity_37340", "image": "A-OKVQA/images/143277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the cars lined up along the sidewalk?\nChoices:\n(A) to park\n(B) to wash\n(C) car show\n(D) to race" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.3125, "loss": 4.136736869812012 }, { "id": "identity_107293", "image": "TextVQA/images/00ebf08a1a6e045c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players from the team in green are visible on the court?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.9375, "loss": 4.728398323059082 }, { "id": "identity_231387", "image": "VQA-AS/images/15812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are brown, and what are they?" }, { "from": "gpt", "value": "The answer is One, the log" } ], "clip_score": 23.0, "loss": 4.523636817932129 }, { "id": "identity_110666", "image": "Super-CLEVR/images/superCLEVR_new_001351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle that is in front of the yellow bus?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.828125, "loss": 4.412926197052002 }, { "id": "identity_272020", "image": "PMC-VQA/images/PMC7756594_mrm28524-fig-0003_325276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the dashed vertical line?\nChoices:\n(A) Motion of the subject \n(B) Acquisition of shots \n(C) Separation between shots \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.234375, "loss": 5.1208176612854 }, { "id": "identity_24680", "image": "PMC-VQA/images/PMC9402411_rcr21020-fig-0002_379370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific suggestion of emphysema can be seen in the image? \nChoices:\n(A) arrowheads pointing to a blood vessel \n(B) white arrows pointing to clustered micronodules \n(C) red arrows pointing to microcystic lesions \n(D) blue arrows pointing to hyperdensity regions " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.734375, "loss": 4.307476997375488 }, { "id": "identity_296568", "image": "TextVQA/images/3bcd2d405bd973f9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being advertised on the back of the beer company's product?" }, { "from": "gpt", "value": "The answer is bud" } ], "clip_score": 12.8515625, "loss": 4.098227500915527 }, { "id": "identity_118007", "image": "Super-CLEVR/images/superCLEVR_new_000908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the large blue shiny car and behind the tiny yellow metallic bicycle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 32.90625, "loss": 5.455420970916748 }, { "id": "identity_334682", "image": "PMC-VQA/images/PMC5143741_fig1_103448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of the object mentioned in the caption?\nChoices:\n(A) Round-shaped\n(B) Cylindrical\n(C) Rectangular\n(D) Oval-shaped" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.09375, "loss": 4.771521091461182 }, { "id": "identity_276278", "image": "Super-CLEVR/images/superCLEVR_new_000205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the small red utility bikes on the right side of the small red metal utility bike equal the number of big gray metal cars?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.21875, "loss": 4.833099365234375 }, { "id": "identity_302942", "image": "TQA/images/abc_question_images/parts_worm_17313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates this structure?\nChoices:\n(A) T\n(B) U\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.59375, "loss": 4.417394638061523 }, { "id": "identity_182512", "image": "A-OKVQA/images/502307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mirrors does the scooter have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.859375, "loss": 4.191256046295166 }, { "id": "identity_304269", "image": "AI2D/images/1174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the small dots on this (K) represent?\nChoices:\n(A) mitochondrion\n(B) nuclear pore\n(C) lysosome\n(D) ribosome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.890625, "loss": 4.90533971786499 }, { "id": "identity_9031", "image": "AI2D/images/3271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region represents the golgi body?\nChoices:\n(A) e\n(B) a\n(C) k\n(D) d" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.109375, "loss": 4.05231237411499 }, { "id": "identity_3850", "image": "A-OKVQA/images/535519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color gamma is the picture in?\nChoices:\n(A) cold filter\n(B) monochromatic\n(C) full color\n(D) sepia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8984375, "loss": 4.500746726989746 }, { "id": "identity_282051", "image": "PMC-VQA/images/PMC7941715_Fig1_386648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which plane of view did the computed tomography show the enlargement?\nChoices:\n(A) Sagittal plane\n(B) Coronal plane\n(C) Transverse plane\n(D) Diagonal plane." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.671875, "loss": 4.0967302322387695 }, { "id": "identity_192137", "image": "A-OKVQA/images/134133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the backpack on the left side of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 18.609375, "loss": 4.4486894607543945 }, { "id": "identity_226124", "image": "ScienceQA/images/14223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property affect their use in daily life?" }, { "from": "gpt", "value": "The answer is Softness makes them comfortable and suitable for their uses" } ], "clip_score": 23.796875, "loss": 4.1816864013671875 }, { "id": "identity_329629", "image": "TextVQA/images/3d9f270c94e95e10.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this newspaper?" }, { "from": "gpt", "value": "The answer is financial times" } ], "clip_score": 25.609375, "loss": 4.200551986694336 }, { "id": "identity_323685", "image": "Super-CLEVR/images/superCLEVR_new_001966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large object to the right of the big regular bus the same shape as the big yellow matte thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.609375, "loss": 5.048365592956543 }, { "id": "identity_37146", "image": "A-OKVQA/images/555833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the cameraman most likely taking a picture from?\nChoices:\n(A) car rooftop\n(B) palm tree\n(C) building\n(D) mountain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6328125, "loss": 4.132798194885254 }, { "id": "identity_34130", "image": "PMC-VQA/images/PMC5510979_FIG1_190600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the concern regarding the MRI scan of the brain? \nChoices:\n(A) Suspicion of orbital cellulitis \n(B) Fracture in the temporal region \n(C) Signs of a tumor in the brain \n(D) No abnormalities detected in the brain scan. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.1875, "loss": 5.037961006164551 }, { "id": "identity_5376", "image": "A-OKVQA/images/119666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What company made this vehicle?\nChoices:\n(A) mercedes\n(B) toyota\n(C) audi\n(D) hyundai" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.65625, "loss": 4.100249290466309 }, { "id": "identity_123956", "image": "Super-CLEVR/images/superCLEVR_new_001221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.6875, "loss": 4.635794639587402 }, { "id": "identity_502", "image": "FigureQA/images/67925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Coral the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.265625, "loss": 4.068387508392334 }, { "id": "identity_28172", "image": "A-OKVQA/images/244088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure can be seen here?\nChoices:\n(A) ramparts\n(B) portcullis\n(C) roof\n(D) drawbridge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.84375, "loss": 4.620110988616943 }, { "id": "identity_104047", "image": "VizWiz/images/VizWiz_train_00018719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible paths lead up to the structure?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.53125, "loss": 4.259839057922363 }, { "id": "identity_276734", "image": "A-OKVQA/images/129648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the action being performed by this individual in the image? \nChoices:\n(A) bouncing\n(B) being chased\n(C) falling\n(D) doing tricks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.359375, "loss": 4.384249210357666 }, { "id": "identity_323152", "image": "Super-CLEVR/images/superCLEVR_new_001927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object to the left of the shiny bus next to the small green shiny car?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 29.734375, "loss": 4.388452529907227 }, { "id": "identity_26727", "image": "PMC-VQA/images/PMC7000265_FI180006-4_110913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does arrow A in the chest CT scan indicate? \nChoices:\n(A) Risk of collapse \n(B) Enlarged lymph node \n(C) Stent breakage \n(D) Air trapped in the lungs " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.78125, "loss": 4.4260077476501465 }, { "id": "identity_218150", "image": "TextVQA/images/552149a06cf2fdcb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which city is mentioned on the item located at the bottom right corner?" }, { "from": "gpt", "value": "The answer is Konstanz" } ], "clip_score": 19.421875, "loss": 4.362054347991943 }, { "id": "identity_272196", "image": "ScienceQA/images/13091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Illinois?\nChoices:\n(A) Chicago\n(B) Topeka\n(C) Springfield\n(D) Nashville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.25, "loss": 4.035924911499023 }, { "id": "identity_215355", "image": "A-OKVQA/images/218701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 11.4140625, "loss": 4.442111968994141 }, { "id": "identity_67556", "image": "ScienceQA/images/20242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of fungi are shown in the food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.390625, "loss": 4.233508110046387 }, { "id": "identity_117345", "image": "VQA2.0/images/546285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.375, "loss": 4.335733890533447 }, { "id": "identity_285455", "image": "A-OKVQA/images/196170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the patrons using chopsticks to eat?\nChoices:\n(A) superiority\n(B) for fun\n(C) for authenticity\n(D) as joke" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.703125, "loss": 4.4907612800598145 }, { "id": "identity_285383", "image": "PMC-VQA/images/PMC6702713_Fig2_26355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the flow signal located in the image provided?\nChoices:\n(A) RPE \n(B) Bruch’s membrane \n(C) The neovascular network \n(D) Drusenoid lesion" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.921875, "loss": 4.115976333618164 }, { "id": "identity_282221", "image": "PMC-VQA/images/PMC4551704_Fig3_418092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical imaging technique is being utilized in the image provided? \nOptions:\n(A) MRI\n(B) CT Scan\n(C) X-ray\n(D) Pet Scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.78125, "loss": 4.24069881439209 }, { "id": "identity_68927", "image": "AI2D/images/1855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the elk play in this food chain?" }, { "from": "gpt", "value": "The answer is Primary consumer" } ], "clip_score": 29.828125, "loss": 4.421056747436523 }, { "id": "identity_210171", "image": "VizWiz/images/VizWiz_train_00018739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the beer bottle cap?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 24.515625, "loss": 4.0367960929870605 }, { "id": "identity_8266", "image": "FigureQA/images/45355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.578125, "loss": 4.282015800476074 }, { "id": "identity_332412", "image": "TextVQA/images/9fcca3a2486c7988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who created this?" }, { "from": "gpt", "value": "The answer is the police" } ], "clip_score": 19.046875, "loss": 4.395730972290039 }, { "id": "identity_127011", "image": "A-OKVQA/images/478407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage is in the glass to the left of the man's plate?" }, { "from": "gpt", "value": "The answer is Cola or soda" } ], "clip_score": 19.234375, "loss": 4.05882453918457 }, { "id": "identity_31928", "image": "AI2D/images/544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the outermost layer?\nChoices:\n(A) C\n(B) D\n(C) A\n(D) B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6640625, "loss": 4.356635093688965 }, { "id": "identity_320283", "image": "A-OKVQA/images/507434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is most likely to visit this club?\nChoices:\n(A) straight men\n(B) women\n(C) all equally\n(D) lesbians" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.921875, "loss": 4.02337121963501 }, { "id": "identity_14258", "image": "FigureQA/images/54815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.140625, "loss": 4.619793891906738 }, { "id": "identity_75609", "image": "ScienceQA/images/17604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the copepod play in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer" } ], "clip_score": 31.578125, "loss": 4.257974147796631 }, { "id": "identity_332913", "image": "PMC-VQA/images/PMC8155358_F2_452888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which parts of the brain show abnormal signals in the MRI image?\nChoices:\n(A) Temporal and Parietal parts\n(B) Occipital and Insular parts\n(C) Frontal and Insular parts\n(D) Parietal and Frontal parts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.21875, "loss": 4.016728401184082 }, { "id": "identity_12632", "image": "Super-CLEVR/images/superCLEVR_new_000164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any tiny purple matte objects that have the same shape as the yellow matte thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.703125, "loss": 5.38215446472168 }, { "id": "identity_156971", "image": "ScienceQA/images/15743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fins can be observed on the moray eel in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 32.03125, "loss": 4.051081657409668 }, { "id": "identity_18169", "image": "DVQA/images/bar_train_00066326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.265625, "loss": 4.5424065589904785 }, { "id": "identity_114075", "image": "VQA2.0/images/66825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are touching the horse sculpture?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.21875, "loss": 4.425076961517334 }, { "id": "identity_304385", "image": "Super-CLEVR/images/superCLEVR_new_001259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray matte scooter and the metallic tandem bike the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.734375, "loss": 5.414162635803223 }, { "id": "identity_329168", "image": "VizWiz/images/VizWiz_train_00018087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this, are these windows? And if they are, how do I open them?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.125, "loss": 4.188860893249512 }, { "id": "identity_1537", "image": "AI2D/images/160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Describe letter D in the diagram.\nChoices:\n(A) moon\n(B) earth\n(C) sun\n(D) tides" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5625, "loss": 4.06863260269165 }, { "id": "identity_287579", "image": "TabMWP/images/3227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.493132591247559 }, { "id": "identity_277904", "image": "AI2D/images/2330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the combination of ovum and sperm?\nChoices:\n(A) Polyp forming\n(B) Zygote\n(C) Branching\n(D) Planula" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.9375, "loss": 4.150937080383301 }, { "id": "identity_318967", "image": "DVQA/images/bar_train_00126383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is finger" } ], "clip_score": 24.375, "loss": 4.865478038787842 }, { "id": "identity_110591", "image": "A-OKVQA/images/438196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible vehicles are behind the elephant?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.78125, "loss": 4.551327705383301 }, { "id": "identity_301354", "image": "DVQA/images/bar_train_00125935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is camera" } ], "clip_score": 28.96875, "loss": 4.025594711303711 }, { "id": "identity_2225", "image": "FigureQA/images/55414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.034512042999268 }, { "id": "identity_283226", "image": "A-OKVQA/images/102506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the intended use of the colored lights?\nChoices:\n(A) decoration\n(B) decoration\n(C) traffic control\n(D) illumination" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.890625, "loss": 4.77347469329834 }, { "id": "identity_286892", "image": "Super-CLEVR/images/superCLEVR_new_000534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the tiny shiny object located on the right side of the small object in front of the tiny gray sedan?" }, { "from": "gpt", "value": "The answer is sedan" } ], "clip_score": 27.890625, "loss": 5.384718418121338 }, { "id": "identity_121662", "image": "VQA2.0/images/392780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the outfield?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.703125, "loss": 4.653556823730469 }, { "id": "identity_264133", "image": "PMC-VQA/images/PMC7393149_Fig1_222964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green color represent in the three-dimensional view of the eye's posterior part?\nChoices:\n(A) Hyaloid vessels \n(B) Retinal tissues \n(C) Choroidal tissues \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.796875, "loss": 4.448871612548828 }, { "id": "identity_280884", "image": "ScienceQA/images/16842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.080801963806152 }, { "id": "identity_286365", "image": "PMC-VQA/images/PMC7330916_rcr2614-fig-0001_203756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the nodules located in the lungs according to the given choices? \n(A) Upper lungs \n(B) Middle lungs \n(C) Both lower lungs \n(D) One lower lung and one upper lung" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.171875, "loss": 4.745006561279297 }, { "id": "identity_25239", "image": "PMC-VQA/images/PMC3349020_Fig1_137069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph was used to identify the fractures? \nChoices:\n(A) Lateral radiograph of pelvis \n(B) CT scan of pelvis \n(C) Anteroposterior radiograph of pelvis \n(D) MRI of pelvis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.203125, "loss": 4.035409450531006 }, { "id": "identity_58372", "image": "DocVQA/images/lpjm0223_61.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are Antacids allowed before dosing with study medication?" }, { "from": "gpt", "value": "The answer is Not permitted" } ], "clip_score": 22.46875, "loss": 4.021311283111572 }, { "id": "identity_217263", "image": "PMC-VQA/images/PMC7181419_Fig9_158378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the possible purpose of the crosshairs in the image?" }, { "from": "gpt", "value": "The answer is To locate and measure the lesion" } ], "clip_score": 27.8125, "loss": 4.059838771820068 }, { "id": "identity_291130", "image": "A-OKVQA/images/486290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which style is employed in creating these glass ornaments? Is it:\n(A) blown glass\n(B) shaped glass\n(C) burnt glass\n(D) torched glass" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 4.421830654144287 }, { "id": "identity_197847", "image": "A-OKVQA/images/443805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hands does the clock have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.96875, "loss": 4.006073474884033 }, { "id": "identity_20521", "image": "ScienceQA/images/20692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) scratchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.375, "loss": 4.0854902267456055 }, { "id": "identity_8045", "image": "A-OKVQA/images/47419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which profession would have used the red vehicle?\nChoices:\n(A) mailmen\n(B) police\n(C) doctors\n(D) firemen" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.359375, "loss": 4.159005165100098 }, { "id": "identity_60269", "image": "DVQA/images/bar_train_00025906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the top bar?" }, { "from": "gpt", "value": "The answer is flavor" } ], "clip_score": 21.921875, "loss": 4.2671051025390625 }, { "id": "identity_299928", "image": "A-OKVQA/images/246001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is currently being shown on the screen in front of the class?\nChoices:\n(A) movie\n(B) powerpoint presentation\n(C) live tv\n(D) weekly news" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.859375, "loss": 4.085216999053955 }, { "id": "identity_306034", "image": "VQA-AS/images/9164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are they planning to do?" }, { "from": "gpt", "value": "The answer is have cheese and wine" } ], "clip_score": 24.328125, "loss": 4.060964107513428 }, { "id": "identity_310229", "image": "PMC-VQA/images/PMC8122658_ijms-22-04325-f005_440908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the materials shown in the image?\nChoices:\n(A) Polymers \n(B) Crystals \n(C) Fibers \n(D) Metals" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.734375, "loss": 5.036920070648193 }, { "id": "identity_75370", "image": "AI2D/images/608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages does the mosquito go through before reaching adulthood?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.484375, "loss": 4.613637447357178 }, { "id": "identity_281884", "image": "A-OKVQA/images/205116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the old man engaged in with the scissors? Is he:\n(A) cutting hair\n(B) cutting string\n(C) cutting paper\n(D) cutting fruit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.375, "loss": 4.419848442077637 }, { "id": "identity_334981", "image": "ScienceQA/images/12867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system accurate or inaccurate?\nThe volume of Uranus is less than one-tenth of the volume of Saturn.\nChoices:\n(A) inaccurate\n(B) accurate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.468470573425293 }, { "id": "identity_219948", "image": "VQA2.0/images/355406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the team in blue uniforms?" }, { "from": "gpt", "value": "The answer is Tennessee Valley" } ], "clip_score": 19.546875, "loss": 5.154002666473389 }, { "id": "identity_281042", "image": "AI2D/images/610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter A represent in the following choices: Egg, Pupae, Adult, or Larvae?\nChoices:\n(A) Egg\n(B) Pupae\n(C) Adult\n(D) Larvae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.8125, "loss": 4.493109226226807 }, { "id": "identity_14823", "image": "ScienceQA/images/13572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) Antarctica\n(B) North America\n(C) South America" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.109375, "loss": 4.450264930725098 }, { "id": "identity_57683", "image": "PlotQA/images/121487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country had the highest information index in 2015?" }, { "from": "gpt", "value": "The answer is Euro area" } ], "clip_score": 27.15625, "loss": 4.010159015655518 }, { "id": "identity_316695", "image": "A-OKVQA/images/292833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type is the red vehicle?\nChoices:\n(A) hatchback\n(B) sedan\n(C) station wagon\n(D) coupe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0625, "loss": 4.938811302185059 }, { "id": "identity_198405", "image": "PMC-VQA/images/PMC3599198_F2_192267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a pathological condition in the image?" }, { "from": "gpt", "value": "The answer is No obvious pathology visible" } ], "clip_score": 26.515625, "loss": 4.4340009689331055 }, { "id": "identity_76925", "image": "ScienceQA/images/8366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many producers are present in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.890625, "loss": 4.94998025894165 }, { "id": "identity_306428", "image": "PMC-VQA/images/PMC6145912_Fig4_369462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow arrowheads point to?\nChoices:\n(A) Labelled sites \n(B) A connection between pre-synaptic and post-synaptic neurons \n(C) Unrelated images \n(D) Noise in the image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.84375, "loss": 4.884572505950928 }, { "id": "identity_329429", "image": "A-OKVQA/images/407522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the people in front?\nChoices:\n(A) waiting turns\n(B) are lost\n(C) spectators\n(D) competitors" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.359375, "loss": 4.278307914733887 }, { "id": "identity_226211", "image": "VQA2.0/images/486711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding blue boards, and what is the color of the kite they are flying?" }, { "from": "gpt", "value": "The answer is one person, yellow and green" } ], "clip_score": 29.0625, "loss": 4.0517048835754395 }, { "id": "identity_295582", "image": "TextVQA/images/2633f60ac1bc0ea9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which sponsor has a yellow sign?" }, { "from": "gpt", "value": "The answer is continental " } ], "clip_score": 25.875, "loss": 4.272586822509766 }, { "id": "identity_115827", "image": "Super-CLEVR/images/superCLEVR_new_001333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly above the green bicycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.046875, "loss": 4.015870094299316 }, { "id": "identity_215625", "image": "VQA2.0/images/385448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.0625, "loss": 4.358824729919434 }, { "id": "identity_186999", "image": "VQA-AS/images/16846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.640625, "loss": 4.048368453979492 }, { "id": "identity_199678", "image": "VQA2.0/images/174099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many decorative hearts are visible on the cake?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.28125, "loss": 4.470212936401367 }, { "id": "identity_291158", "image": "TQA/images/question_images/optics_lense_types_9162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does a concave lens affect light?\nChoices:\n(A) diffract it\n(B) focus it\n(C) refract it\n(D) reflect it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.328125, "loss": 4.630287170410156 }, { "id": "identity_269184", "image": "AI2D/images/2009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does energy move from the Krill to, among the following choices: Grasshopper, Spotted frog, Crab, or Algae?\nChoices:\n(A) Grasshopper\n(B) Spotted frog\n(C) Crab\n(D) Algae" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.4375, "loss": 4.572720050811768 }, { "id": "identity_336217", "image": "A-OKVQA/images/91105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport in the UK might this ball be used for?\nChoices:\n(A) rugby\n(B) soccer\n(C) bowls\n(D) golf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.515625, "loss": 4.218893527984619 }, { "id": "identity_214941", "image": "VQA2.0/images/135920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many towels can you see in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.6875, "loss": 4.632679462432861 }, { "id": "identity_110581", "image": "PMC-VQA/images/PMC8727688_f2_156113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed in this set?" }, { "from": "gpt", "value": "The answer is Four images" } ], "clip_score": 20.921875, "loss": 4.587155818939209 }, { "id": "identity_243894", "image": "A-OKVQA/images/369504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the presence of the bike trail, what other facilities might be expected in this area, and why?" }, { "from": "gpt", "value": "The answer is picnic area" } ], "clip_score": 24.828125, "loss": 4.963907718658447 }, { "id": "identity_124481", "image": "TextVQA/images/7cf080f4b0153323.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the author?" }, { "from": "gpt", "value": "The answer is Verne" } ], "clip_score": 6.87109375, "loss": 4.6072258949279785 }, { "id": "identity_38220", "image": "AI2D/images/3003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the neuron is enclosed by the myelin sheath?\nChoices:\n(A) C\n(B) F\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 4.313711166381836 }, { "id": "identity_112637", "image": "PMC-VQA/images/PMC7344462_diagnostics-10-00355-f003_207774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image with respect to the patient's body?" }, { "from": "gpt", "value": "The answer is Coronal orientation" } ], "clip_score": 24.390625, "loss": 4.274538516998291 }, { "id": "identity_16236", "image": "IconQA/images/64177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.0625, "loss": 4.460193634033203 }, { "id": "identity_332930", "image": "PMC-VQA/images/PMC3923567_F2_266904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the findings in the image?\nChoices:\n(A) Pleural effusion\n(B) Ground-glass opacities\n(C) Nodular opacities\n(D) Cystic lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.0, "loss": 4.7732343673706055 }, { "id": "identity_313135", "image": "AI2D/images/503.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The Adelie Penguin is eaten by ________?\nChoices:\n(A) Killer whale\n(B) algae\n(C) krill\n(D) elephant seal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.703125, "loss": 4.324015140533447 }, { "id": "identity_328048", "image": "A-OKVQA/images/416521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How do these individuals know each other?\nChoices:\n(A) rivals\n(B) coworkers\n(C) siblings\n(D) neighbors" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.65625, "loss": 4.589592933654785 }, { "id": "identity_116659", "image": "Super-CLEVR/images/superCLEVR_new_001088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the green bus?" }, { "from": "gpt", "value": "The answer is A silver van" } ], "clip_score": 27.109375, "loss": 4.761714935302734 }, { "id": "identity_332462", "image": "IconQA/images/27939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 4.0556511878967285 }, { "id": "identity_303303", "image": "FigureQA/images/82361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0, "loss": 4.392298698425293 }, { "id": "identity_185387", "image": "TextVQA/images/9bfb77b253a7f005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What musical group is advertised to perform on the 12th of December at Sala X?" }, { "from": "gpt", "value": "The answer is Arizona Baby" } ], "clip_score": 15.6484375, "loss": 4.739442348480225 }, { "id": "identity_290861", "image": "ScienceQA/images/12004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\n(A) Louisville\n(B) Columbia\n(C) Frankfort\n(D) Richmond" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.390625, "loss": 4.014279365539551 }, { "id": "identity_6206", "image": "AI2D/images/508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, what is a primary consumer?\nChoices:\n(A) snake\n(B) mouse\n(C) hawk\n(D) fox" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.265625, "loss": 4.007326602935791 }, { "id": "identity_107031", "image": "VQA2.0/images/232963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chandeliers are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.796875, "loss": 4.225584506988525 }, { "id": "identity_299637", "image": "A-OKVQA/images/406734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which event will the people take part?\nChoices:\n(A) motorcycle parade\n(B) marathon\n(C) protest\n(D) touring" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.390625, "loss": 4.24932861328125 }, { "id": "identity_218773", "image": "VQA2.0/images/539449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the umbrella over the fruit stand?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.578125, "loss": 4.137536525726318 }, { "id": "identity_149895", "image": "DocVQA/images/slyc0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the person in charge of Clerical and General Duties with the rank WO II?" }, { "from": "gpt", "value": "The answer is King" } ], "clip_score": 19.15625, "loss": 4.045623302459717 }, { "id": "identity_277965", "image": "TQA/images/question_images/cycle_water_1481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of precipitation? \n(A) evaporation \n(B) condensation \n(C) transpiration \n(D) rainfall" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.296875, "loss": 4.327580451965332 }, { "id": "identity_206047", "image": "TextVQA/images/ba0c5d07cd0803fa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus that is behind the yellow taxis?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.046875, "loss": 4.009951114654541 }, { "id": "identity_120565", "image": "TextVQA/images/05f850bedfc426be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What genre does the book belong to?" }, { "from": "gpt", "value": "The answer is Fantasy" } ], "clip_score": 19.3125, "loss": 4.2827630043029785 }, { "id": "identity_333477", "image": "MapQA/images/map_7640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which places have the highest value in the USA?" }, { "from": "gpt", "value": "The answer is Indiana" } ], "clip_score": 28.078125, "loss": 4.553704261779785 }, { "id": "identity_234785", "image": "A-OKVQA/images/222751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of actions the man in the red helmet will likely perform after running, considering he is in a baseball game?" }, { "from": "gpt", "value": "The answer is Run, reach base, slide" } ], "clip_score": 24.53125, "loss": 4.114373207092285 }, { "id": "identity_29260", "image": "A-OKVQA/images/513670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is connecting to all the things in the sky?\nChoices:\n(A) string\n(B) balloons\n(C) claws\n(D) batteries" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.390625, "loss": 4.054924964904785 }, { "id": "identity_323660", "image": "TQA/images/question_images/human_system_excretory_6108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which structure do the renal artery and vein branch off?\nChoices:\n(A) kidney\n(B) Vena cava\n(C) ureter\n(D) dorsal aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.3125, "loss": 4.3568596839904785 }, { "id": "identity_333641", "image": "A-OKVQA/images/142182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person who owns these shelves like to collect?\nChoices:\n(A) stuffed animals\n(B) clothing\n(C) books\n(D) games" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.78125, "loss": 4.025429725646973 }, { "id": "identity_296435", "image": "A-OKVQA/images/176621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What genre of show is being showcased here? \nOptions:\n(A) sporting event\n(B) drama\n(C) musical\n(D) horror" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.15625, "loss": 4.117350101470947 }, { "id": "identity_309895", "image": "FigureQA/images/58963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.875, "loss": 4.715802192687988 }, { "id": "identity_266926", "image": "AI2D/images/3438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram provided, which label corresponds to the Hind leg?\nChoices:\n(A) BF\n(B) BE\n(C) BB\n(D) BS" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.859375, "loss": 4.080801010131836 }, { "id": "identity_232048", "image": "AI2D/images/2322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label depicts the stage right before the second summer in the mountain pine beetle life cycle?" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 41.0, "loss": 4.8364715576171875 }, { "id": "identity_13485", "image": "ScienceQA/images/3276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Florida?\nChoices:\n(A) Nashville\n(B) Orlando\n(C) Tallahassee\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.234375, "loss": 4.2102179527282715 }, { "id": "identity_112159", "image": "A-OKVQA/images/89668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.890625, "loss": 4.069698333740234 }, { "id": "identity_15845", "image": "PMC-VQA/images/PMC6455097_f2_462813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is being referred to in the caption? \nChoices:\n(A) Heart \n(B) Kidney \n(C) Liver \n(D) Lungs " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.375, "loss": 4.387273788452148 }, { "id": "identity_216978", "image": "A-OKVQA/images/432547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rolls of wrapping paper are visible on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.203125, "loss": 4.321434497833252 }, { "id": "identity_267713", "image": "PMC-VQA/images/PMC6145912_Fig4_369462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the yellow arrowheads in the image?\nChoices:\n(A) Labelled sites \n(B) A connection between pre-synaptic and post-synaptic neurons \n(C) Unrelated images \n(D) Noise in the image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.53125, "loss": 5.111974239349365 }, { "id": "identity_328606", "image": "A-OKVQA/images/513602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most common type of horse competition?\nChoices:\n(A) riding\n(B) polo\n(C) racing\n(D) jumping" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.015625, "loss": 4.326992988586426 }, { "id": "identity_289738", "image": "A-OKVQA/images/30973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location should the heavier individual sleep? \nOptions:\n(A) in a different room\n(B) on the floor\n(C) on the bottom\n(D) on the top" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.453125, "loss": 4.214968204498291 }, { "id": "identity_276004", "image": "ScienceQA/images/19463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\nChoices:\n(A) Phoenix\n(B) Manchester\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.390625, "loss": 4.15746545791626 }, { "id": "identity_322930", "image": "IconQA/images/77449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a moon. What is the third image?\nChoices:\n(A) moon\n(B) rocket\n(C) star" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.125, "loss": 4.02585506439209 }, { "id": "identity_291980", "image": "A-OKVQA/images/31057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is located close to the blue truck: a traffic cone, a carrot, a horse, or a stop sign?\nChoices:\n(A) traffic cone\n(B) carrot\n(C) horse\n(D) stop sign" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0, "loss": 4.268557548522949 }, { "id": "identity_304000", "image": "AI2D/images/188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the label C indicate?\nChoices:\n(A) fault plane\n(B) foot wall\n(C) hanging wall\n(D) fault scarp" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.814967155456543 }, { "id": "identity_331612", "image": "IconQA/images/82333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this figure open or closed?\nChoices:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.546875, "loss": 4.009281635284424 }, { "id": "identity_335042", "image": "A-OKVQA/images/160428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the orange orbs seen here?\n\nChoices:\n(A) real oranges\n(B) plastic\n(C) lemons\n(D) pinatas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.59456729888916 }, { "id": "identity_284956", "image": "TextVQA/images/cd27c26641aad71f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which city is depicted in the photo?" }, { "from": "gpt", "value": "The answer is chicago " } ], "clip_score": 17.625, "loss": 5.332862377166748 }, { "id": "identity_296865", "image": "A-OKVQA/images/231476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this area? \nChoices:\n(A) mall\n(B) taxi stand\n(C) train depot\n(D) repair shop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.7734375, "loss": 4.32129430770874 }, { "id": "identity_20355", "image": "FigureQA/images/42051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Mint the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.203125, "loss": 4.430111885070801 }, { "id": "identity_301506", "image": "ScienceQA/images/20960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boston\n(B) Cambridge\n(C) Providence\n(D) Reno" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 4.297929286956787 }, { "id": "identity_282600", "image": "PMC-VQA/images/PMC6248249_f1_399487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of maps are depicted in the image? \nOptions:\n(A) Political and climate maps \n(B) Bullseye and treasure maps \n(C) Sea and mountain maps \n(D) Road and street maps." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5, "loss": 4.132956027984619 }, { "id": "identity_33178", "image": "IconQA/images/68262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squares even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.546875, "loss": 4.087044715881348 }, { "id": "identity_266059", "image": "FigureQA/images/97503.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.6875, "loss": 4.030318260192871 }, { "id": "identity_28947", "image": "IconQA/images/36146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sub. Which picture is fourth?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.390625, "loss": 4.367347717285156 }, { "id": "identity_244195", "image": "A-OKVQA/images/245598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary purpose of the boy in red wearing a glove, and how does it relate to the roles of the other players in the image?" }, { "from": "gpt", "value": "The answer is to catch pitches" } ], "clip_score": 28.140625, "loss": 4.461247444152832 }, { "id": "identity_261993", "image": "FigureQA/images/76903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following best describes the roughness of Dark Seafoam?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.25, "loss": 4.154799938201904 }, { "id": "identity_186278", "image": "A-OKVQA/images/211892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are wearing life jackets?" }, { "from": "gpt", "value": "The answer is Four dogs" } ], "clip_score": 29.15625, "loss": 4.717329978942871 }, { "id": "identity_107837", "image": "PMC-VQA/images/PMC8039310_F3_415512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many white arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.015625, "loss": 4.626922130584717 }, { "id": "identity_296401", "image": "ScienceQA/images/18654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of rock basalt is:\n(A) sedimentary\n(B) igneous\n(C) metamorphic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.671875, "loss": 4.043139457702637 }, { "id": "identity_35162", "image": "A-OKVQA/images/117222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man using the bike for?\nChoices:\n(A) racing\n(B) transporting\n(C) leisure\n(D) exercising" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.09375, "loss": 4.736051082611084 }, { "id": "identity_336054", "image": "PMC-VQA/images/PMC5645507_F1_228230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of maps are depicted in the image?\nChoices:\n(A) Functional \n(B) Perfusion \n(C) Structural \n(D) Diffusion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.84375, "loss": 4.200669765472412 }, { "id": "identity_38323", "image": "A-OKVQA/images/581183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the elephants doing?\nChoices:\n(A) foraging\n(B) performing\n(C) mating\n(D) stampeding" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.953125, "loss": 4.310589790344238 }, { "id": "identity_3367", "image": "PMC-VQA/images/PMC3503823_pone-0049958-g004_167684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dotted white line refer to in the early pupal stage? \nChoices:\n(A) the wings of the butterfly \n(B) the eyes of the butterfly \n(C) the exoskeleton of the butterfly \n(D) the digestive system of the butterfly " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.1875, "loss": 4.425826072692871 }, { "id": "identity_235992", "image": "A-OKVQA/images/67015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are there in the image?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 18.421875, "loss": 4.072648525238037 }, { "id": "identity_192849", "image": "PMC-VQA/images/PMC6692028_pone.0220922.g007_22956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pellets or BBs can be counted in this X-ray image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 26.921875, "loss": 4.683051586151123 }, { "id": "identity_303740", "image": "FigureQA/images/26650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.71875, "loss": 4.038623332977295 }, { "id": "identity_12626", "image": "TQA/images/abc_question_images/parts_skeleton_17266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the ribs in the diagram.\nChoices:\n(A) X\n(B) W\n(C) M\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.671875, "loss": 4.474582672119141 }, { "id": "identity_303072", "image": "IconQA/images/8154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, which color are you less likely to get?\nChoices:\n(A) blue\n(B) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.015625, "loss": 4.294898986816406 }, { "id": "identity_290669", "image": "A-OKVQA/images/338064.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following locations can snow be found? \n(A) in apartments\n(B) in trees\n(C) underwater\n(D) mountain peaks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.3515625, "loss": 4.0532989501953125 }, { "id": "identity_122845", "image": "PMC-VQA/images/PMC6909124_acm212773-fig-0001_82999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many crosshair markers are present within the green contour?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.984375, "loss": 4.587619304656982 }, { "id": "identity_45416", "image": "MapQA/images/map_9533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall within the lowest percentage range of recommendations?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 31.21875, "loss": 4.0644941329956055 }, { "id": "identity_18563", "image": "PMC-VQA/images/PMC8207792_Fig1_469449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI show in the medial thalami?\nChoices:\n(A) Low signal alteration \n(B) High signal alteration \n(C) No signal alteration \n(D) Only left signal alteration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.265625, "loss": 4.275792121887207 }, { "id": "identity_30857", "image": "FigureQA/images/90122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Seafoam have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.59375, "loss": 4.211592674255371 }, { "id": "identity_6353", "image": "PMC-VQA/images/PMC6059037_F2_345718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow represent in the image? \nChoices:\n(A) Column A \n(B) Column B \n(C) Column C \n(D) Column D " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.546875, "loss": 4.320664405822754 }, { "id": "identity_275431", "image": "A-OKVQA/images/497227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which time of day are these umbrellas typically used? \nChoices:\n(A) noon\n(B) day\n(C) night\n(D) evening" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.5546875, "loss": 4.196343898773193 }, { "id": "identity_127735", "image": "A-OKVQA/images/415458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.03125, "loss": 4.187984943389893 }, { "id": "identity_264790", "image": "FigureQA/images/63.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Rosy Brown or Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.571560382843018 }, { "id": "identity_309684", "image": "AI2D/images/3671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the terminal part of the large intestine?\nChoices:\n(A) cecum\n(B) Rectum\n(C) parotid\n(D) anus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 4.029282569885254 }, { "id": "identity_20558", "image": "TextVQA/images/31020aab9724657b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what job title is listed in the middle of the board?" }, { "from": "gpt", "value": "The answer is libarian" } ], "clip_score": 19.21875, "loss": 4.349974632263184 }, { "id": "identity_270666", "image": "ScienceQA/images/17953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Oklahoma?\n(A) Oklahoma City\n(B) Tallahassee\n(C) Jefferson City\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.175487995147705 }, { "id": "identity_48430", "image": "DocVQA/images/nhcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.671875, "loss": 4.386837959289551 }, { "id": "identity_308565", "image": "ScienceQA/images/16717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tallahassee\n(B) Richmond\n(C) Orlando\n(D) Columbia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.96875, "loss": 4.203289985656738 }, { "id": "identity_313839", "image": "A-OKVQA/images/253413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of setting is this beyond the water?\nChoices:\n(A) suburbs\n(B) city\n(C) beach\n(D) country" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5703125, "loss": 4.61081075668335 }, { "id": "identity_295583", "image": "PMC-VQA/images/PMC7693123_diagnostics-10-00899-f003_306457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which radiological imaging modality is utilized to detect the \"crazy paving pattern\" as described in the caption?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) Ultrasound." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.328125, "loss": 4.33010196685791 }, { "id": "identity_123526", "image": "PMC-VQA/images/PMC5627796_F3_224139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lesion located on the left or right side of the patient's body?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 27.4375, "loss": 4.303638458251953 }, { "id": "identity_143639", "image": "DocVQA/images/zkfl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person to whom the letter is addressed?" }, { "from": "gpt", "value": "The answer is Manolo" } ], "clip_score": 20.359375, "loss": 4.00489616394043 }, { "id": "identity_6943", "image": "A-OKVQA/images/358462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the horse attached to the wagon?\nChoices:\n(A) by accident\n(B) eats grass\n(C) pulls wagon\n(D) stops horse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.375, "loss": 4.3098063468933105 }, { "id": "identity_126158", "image": "VQA-RAD/images/synpic52932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the patient during this X-ray?" }, { "from": "gpt", "value": "The answer is The patient was standing" } ], "clip_score": 31.609375, "loss": 4.005520343780518 }, { "id": "identity_310111", "image": "ScienceQA/images/5534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Denver\n(B) Provo\n(C) Phoenix\n(D) Salt Lake City" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.15625, "loss": 4.074909210205078 }, { "id": "identity_258776", "image": "A-OKVQA/images/183100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three different types of tools shown in the image and explain their primary uses. Based on these uses, determine the most likely place where these tools would be found" }, { "from": "gpt", "value": "The answer is store" } ], "clip_score": 27.765625, "loss": 6.202563285827637 }, { "id": "identity_15339", "image": "PMC-VQA/images/PMC7786482_Fig1_335686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the radial shaft fracture shown in the X-ray? \nChoices:\n(A) middle third \n(B) proximal third \n(C) distal third \n(D) junction of proximal and middle thirds. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.234375, "loss": 4.4468159675598145 }, { "id": "identity_286450", "image": "FigureQA/images/56039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.265625, "loss": 4.0278000831604 }, { "id": "identity_68335", "image": "AI2D/images/2106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 34.9375, "loss": 4.324152946472168 }, { "id": "identity_104612", "image": "A-OKVQA/images/176925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding on the elephant?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.015625, "loss": 4.088249206542969 }, { "id": "identity_336718", "image": "ScienceQA/images/1922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbus\n(B) Houston\n(C) Lincoln\n(D) Carson City" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.125, "loss": 4.228335380554199 }, { "id": "identity_301507", "image": "A-OKVQA/images/395198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person riding?\nChoices:\n(A) bike\n(B) skateboard\n(C) scooter\n(D) motorcycle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5625, "loss": 4.1690263748168945 }, { "id": "identity_272093", "image": "ScienceQA/images/15517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) bumpy\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.296875, "loss": 4.19484806060791 }, { "id": "identity_132116", "image": "TextVQA/images/cf8c94df0f7b7707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand logo is visible on the pitcher's uniform?" }, { "from": "gpt", "value": "The answer is Total" } ], "clip_score": 24.0, "loss": 4.281412124633789 }, { "id": "identity_308954", "image": "Super-CLEVR/images/superCLEVR_new_000054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of this big yellow thing the same as this purple metal thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 15.375, "loss": 4.94643497467041 }, { "id": "identity_22912", "image": "DocVQA/images/pqll0006_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name mentioned in the Approval Form ?" }, { "from": "gpt", "value": "The answer is Alan B. Norman" } ], "clip_score": 25.859375, "loss": 4.830000400543213 }, { "id": "identity_298100", "image": "ScienceQA/images/8085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) South Carolina\n(B) Pennsylvania\n(C) Indiana\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.09375, "loss": 4.192607402801514 }, { "id": "identity_336012", "image": "VizWiz/images/VizWiz_train_00001811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this thing?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.015625, "loss": 4.205633640289307 }, { "id": "identity_282788", "image": "Super-CLEVR/images/superCLEVR_new_001009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the colors of the shiny articulated bus and the small aeroplane the same?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 32.0, "loss": 5.189994812011719 }, { "id": "identity_106775", "image": "TextVQA/images/63b6b86f375d00fd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many microphones are visible on the podium?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.09375, "loss": 4.520190238952637 }, { "id": "identity_183303", "image": "A-OKVQA/images/36915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object flying in the sky?" }, { "from": "gpt", "value": "The answer is Blue and brown" } ], "clip_score": 23.171875, "loss": 4.206911563873291 }, { "id": "identity_4610", "image": "AI2D/images/3233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The outermost part of the cell is called _______.\nChoices:\n(A) Cytoplasm\n(B) Ribosome\n(C) Lysosome\n(D) Cell(plasma) membrance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.734375, "loss": 4.096194267272949 }, { "id": "identity_207072", "image": "VQA2.0/images/203621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cats are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.0, "loss": 4.136976718902588 }, { "id": "identity_149168", "image": "DocVQA/images/xfhb0228_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Chairman presiding over the event?" }, { "from": "gpt", "value": "The answer is Ewell T. Scott" } ], "clip_score": 22.0, "loss": 4.126271724700928 }, { "id": "identity_305887", "image": "DVQA/images/bar_train_00198239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this bar?" }, { "from": "gpt", "value": "The answer is tail" } ], "clip_score": 18.34375, "loss": 4.117609977722168 }, { "id": "identity_262930", "image": "VQA2.0/images/551299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who currently possesses the ball?" }, { "from": "gpt", "value": "The answer is right" } ], "clip_score": 14.8359375, "loss": 4.9424662590026855 }, { "id": "identity_231270", "image": "VQA2.0/images/61729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it approximately, considering the lighting and activity in the image?" }, { "from": "gpt", "value": "The answer is Early evening" } ], "clip_score": 21.296875, "loss": 4.216911792755127 }, { "id": "identity_276934", "image": "VizWiz/images/VizWiz_train_00013353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you please tell me what this is?" }, { "from": "gpt", "value": "The answer is cup dutch couple dancing on" } ], "clip_score": 31.75, "loss": 4.445500373840332 }, { "id": "identity_312688", "image": "Super-CLEVR/images/superCLEVR_new_000358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the matte object in front of the blue matte thing and the matte object behind the small matte vehicle the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.3125, "loss": 5.238770484924316 }, { "id": "identity_333335", "image": "PMC-VQA/images/PMC7825226_Fig1_349343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in this? \nChoices:\n(A) Computed tomography (CT) scan \n(B) Positron emission tomography (PET) scan \n(C) Magnetic resonance imaging (MRI) \n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.142504692077637 }, { "id": "identity_275702", "image": "A-OKVQA/images/416337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following characteristics indicates that a fruit is the least sweet? \n(A) biggest \n(B) green \n(C) most freckled \n(D) smallest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.65625, "loss": 4.115108489990234 }, { "id": "identity_275741", "image": "FigureQA/images/61368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Maroon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.0, "loss": 4.7597150802612305 }, { "id": "identity_299730", "image": "PMC-VQA/images/PMC4374855_pgen.1005057.g002_371904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images of the femur are depicted in figure A? \nOptions:\n(A) Cross-sectional images of the femur.\n(B) Images of the femur from various angles.\n(C) Longitudinal mid-plane images of the femur.\n(D) 3D images of the femur." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.03125, "loss": 4.250948429107666 }, { "id": "identity_156958", "image": "TQA/images/question_images/tides_136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different tide types are illustrated in this image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.203125, "loss": 4.325867652893066 }, { "id": "identity_2289", "image": "FigureQA/images/61029.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Gold the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5859375, "loss": 4.255212783813477 }, { "id": "identity_305029", "image": "TQA/images/abc_question_images/skin_cross_section_17583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the highest concentration of adipose tissue found?\nChoices:\n(A) L\n(B) P\n(C) K\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.0409626960754395 }, { "id": "identity_299871", "image": "TextVQA/images/1a3ff09978212117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is the initial letter in the title of the book featuring a donkey?" }, { "from": "gpt", "value": "The answer is k" } ], "clip_score": 19.578125, "loss": 4.150153160095215 }, { "id": "identity_109645", "image": "TextVQA/images/fc09bb959a7777b5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature is being advertised on the poster behind the laptops?" }, { "from": "gpt", "value": "The answer is Touchscreen sensitivity" } ], "clip_score": 26.328125, "loss": 4.394686698913574 }, { "id": "identity_264788", "image": "ScienceQA/images/7258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\nChoices:\n(A) Reno\n(B) Carson City\n(C) Baton Rouge\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.015625, "loss": 4.071601867675781 }, { "id": "identity_295779", "image": "AI2D/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to the endosperm?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.53125, "loss": 4.318211078643799 }, { "id": "identity_274168", "image": "FigureQA/images/23375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.7421875, "loss": 4.342474937438965 }, { "id": "identity_281266", "image": "PMC-VQA/images/PMC6766146_fig2_42891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you describe the state of the patient's upper lobes?\nChoices:\n(A) Normal\n(B) Inflamed\n(C) Cancerous\n(D) Blocked" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.359375, "loss": 4.030759334564209 }, { "id": "identity_309351", "image": "ScienceQA/images/11706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Pennsylvania\n(B) Connecticut\n(C) New Jersey\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.890625, "loss": 4.220296382904053 }, { "id": "identity_331846", "image": "PMC-VQA/images/PMC7513650_F1_260644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of injury is shown in the image?\nChoices:\n(A) Fracture\n(B) Sprain\n(C) Longitudinal tear\n(D) Dislocation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0, "loss": 4.405425548553467 }, { "id": "identity_32866", "image": "IconQA/images/48371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a triangle as a face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.75, "loss": 4.335066318511963 }, { "id": "identity_16515", "image": "A-OKVQA/images/360870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what stage of preparation are the two nearest plates?\nChoices:\n(A) cleanup\n(B) pickup\n(C) salad\n(D) garnish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.765625, "loss": 4.38848352432251 }, { "id": "identity_293050", "image": "TQA/images/question_images/convection_of_air_6660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When cool air reaches a hot radiator, what is the direction of movement of the cool air?\nChoices:\n(A) Warm air rises\n(B) Cool air falls\n(C) Warm air falls\n(D) Cool air rises" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.484375, "loss": 4.382450103759766 }, { "id": "identity_261661", "image": "ScienceQA/images/19917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? Is it:\n(A) the Marshall Islands\n(B) Vanuatu\n(C) Nauru\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.1875, "loss": 4.510087490081787 }, { "id": "identity_227568", "image": "A-OKVQA/images/51021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three specific items in the image that indicate the computers are used for gaming, and explain how each item supports this conclusion" }, { "from": "gpt", "value": "The answer is Gaming controllers, game boxes, game screens" } ], "clip_score": 25.515625, "loss": 4.157802104949951 }, { "id": "identity_27942", "image": "PMC-VQA/images/PMC6694561_Fig5_23766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brown-stained nucleus represent in the images? \nChoices:\n(A) Dead cells \n(B) PCNA -positive cells \n(C) PCNA-negative cells \n(D) Empty space " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.546875, "loss": 4.419528007507324 }, { "id": "identity_325405", "image": "ChartQA/images/two_col_83134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this trend going up or down?" }, { "from": "gpt", "value": "The answer is increasing" } ], "clip_score": 24.734375, "loss": 4.2368879318237305 }, { "id": "identity_35695", "image": "PMC-VQA/images/PMC6965131_Fig1_101500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the nose is being measured? \nChoices:\n(A) Nostril width \n(B) Length of the bridge \n(C) Length of the septum \n(D) Length of the tip " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.4375, "loss": 4.5202531814575195 }, { "id": "identity_274107", "image": "PMC-VQA/images/PMC8303455_cells-10-01578-f002_16104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the asterisk (*) in the image provided? \nOptions:\n(A) The intensity of the fluorescence.\n(B) The segmentation of somites.\n(C) The location of the midline.\n(D) The quantity of cells." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 5.36447811126709 }, { "id": "identity_127547", "image": "VQA2.0/images/428477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any structures visible in the background of the image?" }, { "from": "gpt", "value": "The answer is State whether there are structures and describe them if present" } ], "clip_score": 18.421875, "loss": 4.423825263977051 }, { "id": "identity_335659", "image": "PMC-VQA/images/PMC6965131_Fig1_101500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the nose is being measured?\nChoices:\n(A) Nostril width\n(B) Length of the bridge\n(C) Length of the septum\n(D) Length of the tip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.5, "loss": 4.555470943450928 }, { "id": "identity_267385", "image": "ScienceQA/images/18098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Dakota?\n(A) Bismarck\n(B) Denver\n(C) Fargo\n(D) Springfield" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.9375, "loss": 4.061553955078125 }, { "id": "identity_287372", "image": "A-OKVQA/images/129175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of these boats?\nChoices:\n(A) gondola\n(B) tugboat\n(C) rowboat\n(D) putter" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 4.245604038238525 }, { "id": "identity_26470", "image": "PMC-VQA/images/PMC4891826_Fig2_37394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pattern of the cartilage formation seen in the images? \nChoices:\n(A) Contiguous \n(B) Noncontiguous \n(C) Regular \n(D) Irregular " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.421781539916992 }, { "id": "identity_29031", "image": "A-OKVQA/images/144562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who holds an all-time record in a statistical category of this sport?\nChoices:\n(A) michael jordan\n(B) rickey henderson\n(C) wayne gretzky\n(D) tiger woods" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.328125, "loss": 4.194454669952393 }, { "id": "identity_15108", "image": "DocVQA/images/jggn0226_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which hospital is the surgery done?" }, { "from": "gpt", "value": "The answer is SFH" } ], "clip_score": 21.75, "loss": 4.428813457489014 }, { "id": "identity_332430", "image": "TQA/images/question_images/cellular_respiration_6621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this process require energy directly from the sun?\nChoices:\n(A) occasionally\n(B) yes\n(C) only when it is sunny\n(D) no" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.9375, "loss": 4.260542392730713 }, { "id": "identity_307108", "image": "FigureQA/images/35047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median for Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.328125, "loss": 4.038262844085693 }, { "id": "identity_297096", "image": "ScienceQA/images/12199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.328125, "loss": 4.234793663024902 }, { "id": "identity_288502", "image": "A-OKVQA/images/528713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following events is scheduled to occur? \n(A) horse parade\n(B) horse racing\n(C) horse trading\n(D) horse show" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.828125, "loss": 4.106634140014648 }, { "id": "identity_337224", "image": "TextVQA/images/640dae20e17a25c6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this a flyer for?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 21.671875, "loss": 4.988790035247803 }, { "id": "identity_31299", "image": "PMC-VQA/images/PMC4695478_Fig1_457691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white arrow pointing to in the contrast-enhanced CT scan?\nChoices:\n(A) graft \n(B) abcess \n(C) normal tissue \n(D) scar " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.453125, "loss": 5.140091896057129 }, { "id": "identity_25428", "image": "PMC-VQA/images/PMC5217944_pone.0169462.g002_111194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the position of the patient for the PET/CT imaging?\nChoices:\n(A) Supine \n(B) Prone \n(C) Lateral \n(D) Standing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 4.7519097328186035 }, { "id": "identity_275859", "image": "TabMWP/images/21887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the recorded weights of the winning dog at recent dog shows, what was the rate of change between 2017 and 2018?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 37.84375, "loss": 4.129429340362549 }, { "id": "identity_6883", "image": "A-OKVQA/images/136979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What should the bus use to safely move in front of the bicyclist?\nChoices:\n(A) wiper blades\n(B) sign\n(C) turning signals\n(D) motor" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.8125, "loss": 4.9812211990356445 }, { "id": "identity_214601", "image": "TextVQA/images/81fc54d6ff1b4cf6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.265625, "loss": 4.572996616363525 }, { "id": "identity_303315", "image": "VQA2.0/images/92627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is this made of?" }, { "from": "gpt", "value": "The answer is wire" } ], "clip_score": 10.8515625, "loss": 4.090124130249023 }, { "id": "identity_311658", "image": "DVQA/images/bar_train_00157197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each set?" }, { "from": "gpt", "value": "The answer is key" } ], "clip_score": 17.515625, "loss": 4.082973480224609 }, { "id": "identity_306506", "image": "PMC-VQA/images/PMC9348820_F1_363019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in the confocal images?\nChoices:\n(A) Cell proliferation \n(B) Vascular pruning events \n(C) Leakage \n(D) Morphometric parameters" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 4.403702735900879 }, { "id": "identity_277834", "image": "ScienceQA/images/19179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Ohio\n(B) Pennsylvania\n(C) New Jersey\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.390625, "loss": 4.007072925567627 }, { "id": "identity_262338", "image": "PMC-VQA/images/PMC6333181_acm212498-fig-0001_422859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical view is depicted in the sagittal image? \nChoices:\n(A) Anterior view\n(B) Posterior view\n(C) Side view\n(D) Bottom view" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.71875, "loss": 4.478642463684082 }, { "id": "identity_307475", "image": "TabMWP/images/9725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table presents a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.453125, "loss": 4.192007541656494 }, { "id": "identity_338310", "image": "TextVQA/images/ae02b7e83ca1be6d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the men's room located?" }, { "from": "gpt", "value": "The answer is left" } ], "clip_score": 14.6640625, "loss": 4.401247501373291 }, { "id": "identity_336429", "image": "PMC-VQA/images/PMC6487530_Fig7_472532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique shown here?\nChoices:\n(A) MRI.\n(B) CT scan.\n(C) PET scan.\n(D) X-ray." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 4.225163459777832 }, { "id": "identity_185009", "image": "A-OKVQA/images/368249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.671875, "loss": 4.3919782638549805 }, { "id": "identity_253826", "image": "Super-CLEVR/images/superCLEVR_new_001076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of the same material as the small gray car, and what are their shapes?" }, { "from": "gpt", "value": "The answer is Two objects; shapes are car and articulated bus" } ], "clip_score": 24.546875, "loss": 4.1249823570251465 }, { "id": "identity_289053", "image": "A-OKVQA/images/562575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the cyclist located? \nOptions:\n(A) at home\n(B) in the store\n(C) behind the bus\n(D) on the bus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.046875, "loss": 4.85205602645874 }, { "id": "identity_331859", "image": "PMC-VQA/images/PMC5550971_RSIF20170203F1_201629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the colour coding in this represent? \nChoices:\n(A) Age of the patient \n(B) Extent of LV damage \n(C) Type of heart disease \n(D) Race of the patient" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.127928733825684 }, { "id": "identity_327757", "image": "A-OKVQA/images/343322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this cantina?\nChoices:\n(A) gary's\n(B) fred's\n(C) george's\n(D) bill's" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8828125, "loss": 4.091391563415527 }, { "id": "identity_320287", "image": "A-OKVQA/images/490044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of event is shown here?\nChoices:\n(A) rally\n(B) competition\n(C) farmer's market\n(D) concert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.203125, "loss": 4.465903282165527 }, { "id": "identity_191402", "image": "PMC-VQA/images/PMC7566610_Fig7_276673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the lung parenchyma in the CT scan?" }, { "from": "gpt", "value": "The answer is Normal" } ], "clip_score": 32.5, "loss": 4.745494842529297 }, { "id": "identity_294792", "image": "A-OKVQA/images/348621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the typical social status of individuals who own these boats?\nChoices:\n(A) poor\n(B) broke\n(C) wealthy\n(D) happy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.296875, "loss": 4.553546905517578 }, { "id": "identity_269201", "image": "FigureQA/images/69102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Dark Gold or Olive?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.734375, "loss": 4.687326908111572 }, { "id": "identity_262147", "image": "TextVQA/images/cace6f17ad0f4c31.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organization is the sponsor of the event featured on the women's dresses?" }, { "from": "gpt", "value": "The answer is change" } ], "clip_score": 22.640625, "loss": 5.112378120422363 }, { "id": "identity_3292", "image": "FigureQA/images/40180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Medium Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.609375, "loss": 4.14811372756958 }, { "id": "identity_312097", "image": "VizWiz/images/VizWiz_train_00014133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this thing work with the tablet?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.71875, "loss": 4.132983207702637 }, { "id": "identity_332122", "image": "A-OKVQA/images/318783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they holding gloves to the ground?\nChoices:\n(A) to dance\n(B) to catch\n(C) to clean\n(D) to trick" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.390625, "loss": 4.020969390869141 }, { "id": "identity_15728", "image": "DocVQA/images/yynk0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company has produced this policy?" }, { "from": "gpt", "value": "The answer is pilot insurance agency" } ], "clip_score": 24.578125, "loss": 4.028367519378662 }, { "id": "identity_334918", "image": "A-OKVQA/images/51396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this rack on the far left used for?\nChoices:\n(A) skis\n(B) hats\n(C) coats\n(D) goggles" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.875, "loss": 4.389002323150635 }, { "id": "identity_281088", "image": "A-OKVQA/images/432912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the orange color of the triangular cones?\nChoices:\n(A) for camouflage\n(B) for visibility\n(C) for design\n(D) randomly chosen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 4.88671875, "loss": 4.5544915199279785 }, { "id": "identity_268027", "image": "FigureQA/images/63021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the area under the curve with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5625, "loss": 4.547715187072754 }, { "id": "identity_329281", "image": "DocVQA/images/fgjh0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the president of this corporation?" }, { "from": "gpt", "value": "The answer is gene rouse" } ], "clip_score": 20.734375, "loss": 4.253537178039551 }, { "id": "identity_3696", "image": "A-OKVQA/images/126770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people sitting outside the building doing?\nChoices:\n(A) dining\n(B) typing\n(C) arm wrestling\n(D) drawing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5625, "loss": 4.149897575378418 }, { "id": "identity_64819", "image": "ScienceQA/images/12069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the sapphire?" }, { "from": "gpt", "value": "The answer is Faceted geometric shape" } ], "clip_score": 34.28125, "loss": 4.19248104095459 }, { "id": "identity_304432", "image": "ScienceQA/images/2139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Concord\n(B) Cambridge\n(C) Boston\n(D) Missoula" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.453125, "loss": 4.125628471374512 }, { "id": "identity_300846", "image": "IconQA/images/34320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a dog. What is the fourth image?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.695590972900391 }, { "id": "identity_311642", "image": "ChartQA/images/10365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest part in the chart?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 19.4375, "loss": 4.58417272567749 }, { "id": "identity_23440", "image": "A-OKVQA/images/506508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the tall thin things carry?\nChoices:\n(A) graphite\n(B) wind turbine\n(C) fruits\n(D) power lines" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.046875, "loss": 4.253300189971924 }, { "id": "identity_204937", "image": "VQA2.0/images/25353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.234375, "loss": 4.289158821105957 }, { "id": "identity_17705", "image": "FigureQA/images/84233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9140625, "loss": 4.431100368499756 }, { "id": "identity_288917", "image": "A-OKVQA/images/574940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fabric is recommended for window curtains among the following options: linen, silk, nylon, or cotton?\nChoices:\n(A) linen\n(B) silk\n(C) nylon\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.265625, "loss": 4.027891635894775 }, { "id": "identity_280826", "image": "AI2D/images/805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given food chain diagram, what is the classification of the deer?\nChoices:\n(A) producer\n(B) decomposer\n(C) secondary consumer\n(D) primary consumer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.9375, "loss": 4.0690412521362305 }, { "id": "identity_215366", "image": "VizWiz/images/VizWiz_train_00012246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pants?" }, { "from": "gpt", "value": "The answer is Navy blue" } ], "clip_score": 17.4375, "loss": 4.4990553855896 }, { "id": "identity_291238", "image": "IconQA/images/10415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on out of white and green? \n(A) neither; white and green are equally likely\n(B) white\n(C) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.75, "loss": 4.305894374847412 }, { "id": "identity_285173", "image": "A-OKVQA/images/6753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fruit is depicted in the image?\nChoices:\n(A) watermelon\n(B) banana\n(C) strawberry\n(D) cantaloupe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.859375, "loss": 4.06320858001709 }, { "id": "identity_114602", "image": "VQA2.0/images/85816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible tree trunks are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.0625, "loss": 4.409208297729492 }, { "id": "identity_334843", "image": "A-OKVQA/images/507087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which music group would be able to use all of these items without sharing?\nChoices:\n(A) cream\n(B) nsync\n(C) backstreet boys\n(D) spice girls" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.734375, "loss": 4.020204067230225 }, { "id": "identity_64143", "image": "ScienceQA/images/1566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the object in the image work?" }, { "from": "gpt", "value": "The answer is Hooks catch in loops to hold surfaces together" } ], "clip_score": 23.875, "loss": 5.829738140106201 }, { "id": "identity_309846", "image": "AI2D/images/3246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What makes the nucleus important for a cell's function?\nChoices:\n(A) it is covered by Rough endoplasmic reticulum\n(B) containing the cell's hereditary material and controlling its metabolism, growth, and reproduction.\n(C) it is a thickest layer of a cell\n(D) it is the inner part of the cell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.028690338134766 }, { "id": "identity_321512", "image": "PMC-VQA/images/PMC8610683_fig5_117707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the intestine is shown in the image?\nChoices:\n(A) Stomach\n(B) Colon\n(C) Duodenum\n(D) Ileum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 5.17111349105835 }, { "id": "identity_77841", "image": "AI2D/images/3543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of cytoskeleton filaments are shown in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.71875, "loss": 4.259049415588379 }, { "id": "identity_4415", "image": "PMC-VQA/images/PMC4864780_Fig5_29969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of organisms are shown in the micrographs? \nChoices:\n(A) Bacteria \n(B) Zooplankton \n(C) Phytoplankton \n(D) Fungi " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.328125, "loss": 4.312129020690918 }, { "id": "identity_291624", "image": "PMC-VQA/images/PMC4101852_F1_306828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is situated in the upper part of the thigh scan?\nChoices:\n(A) Gluteus Maximus \n(B) Psoas \n(C) Rectus Femoris \n(D) Sartorius" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0625, "loss": 4.720191955566406 }, { "id": "identity_290299", "image": "DVQA/images/bar_train_00062501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the color indianred?" }, { "from": "gpt", "value": "The answer is need" } ], "clip_score": 21.03125, "loss": 4.187657356262207 }, { "id": "identity_325719", "image": "A-OKVQA/images/514293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items sold here help people stay the driest?\nChoices:\n(A) shirts\n(B) bottles\n(C) umbrellas\n(D) shoes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.6015625, "loss": 4.1914873123168945 }, { "id": "identity_20061", "image": "A-OKVQA/images/478755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where will the guy on third base run to next?\nChoices:\n(A) 2nd base\n(B) pitcher's mound\n(C) homeplate\n(D) 1st base" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.140625, "loss": 4.112833499908447 }, { "id": "identity_317034", "image": "AI2D/images/3543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outer part of this structure?\nChoices:\n(A) Plasma membrane\n(B) Microvilli\n(C) Lysosome\n(D) Mitochondrion" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.71875, "loss": 4.350649833679199 }, { "id": "identity_328829", "image": "PMC-VQA/images/PMC5799731_F9_272106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the options shows a side view of a polyp?\nChoices:\n(A) b \n(B) d \n(C) k \n(D) h" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.25, "loss": 4.223079204559326 }, { "id": "identity_273305", "image": "PMC-VQA/images/PMC7335443_Fig2_204990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image provided?\nChoices:\n(A) Brain tissue \n(B) Neoplastic cells \n(C) Skin cells \n(D) Liver cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.066140174865723 }, { "id": "identity_29687", "image": "A-OKVQA/images/105647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is shown?\nChoices:\n(A) urban\n(B) rural\n(C) forest\n(D) coastal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.8828125, "loss": 4.465544700622559 }, { "id": "identity_9589", "image": "TQA/images/question_images/human_system_respiratory_3759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part connects the nasal and oral cavities to the lungs?\nChoices:\n(A) trachea\n(B) alveoli\n(C) bronchi\n(D) nostril" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.078125, "loss": 4.258646488189697 }, { "id": "identity_218677", "image": "VQA2.0/images/398183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bikers are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.65625, "loss": 4.324556827545166 }, { "id": "identity_269783", "image": "IconQA/images/40848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, following the first picture of a fish? \nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.25, "loss": 4.555563449859619 }, { "id": "identity_333611", "image": "ScienceQA/images/13266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boise\n(B) Nampa\n(C) New Orleans\n(D) Fargo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 4.416910171508789 }, { "id": "identity_10469", "image": "AI2D/images/3138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is directly below the cuticle?\nChoices:\n(A) Epidermis\n(B) Guard cells\n(C) Stomata\n(D) Skin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.34375, "loss": 4.761312007904053 }, { "id": "identity_40560", "image": "DVQA/images/bar_train_00107775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the category that has the second highest number of people preferring it?" }, { "from": "gpt", "value": "The answer is cold" } ], "clip_score": 26.671875, "loss": 4.523724555969238 }, { "id": "identity_16232", "image": "TQA/images/question_images/em_spectrum_6816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How long are radio wavelengths?\nChoices:\n(A) 800nm or a single cell\n(B) 1mm or a pin head\n(C) 10cm or a hand width\n(D) 0.1mm or a human hair" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.241139888763428 }, { "id": "identity_108019", "image": "Super-CLEVR/images/superCLEVR_new_001277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle that is located between the green bike and the blue bus?" }, { "from": "gpt", "value": "The answer is The car is silver" } ], "clip_score": 29.71875, "loss": 4.236971855163574 }, { "id": "identity_74254", "image": "TQA/images/abc_question_images/parts_skeleton_17270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phalanges are present in one hand as per the diagram?" }, { "from": "gpt", "value": "The answer is Fourteen" } ], "clip_score": 26.21875, "loss": 4.311732292175293 }, { "id": "identity_285066", "image": "AI2D/images/1274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which process in this cycle are carbon dioxide and water necessary?\nChoices:\n(A) Cellular respiration\n(B) Photosynthesis\n(C) Mitochondria\n(D) Animal cell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.25, "loss": 4.11562967300415 }, { "id": "identity_329607", "image": "PMC-VQA/images/PMC4810860_RSPB20152917F3_15682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow indicating in the image?\nChoices:\n(A) A wedge-shaped tubercle \n(B) A posterior tubercle ridge \n(C) A discrete tubercle \n(D) The dermal skeleton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.71875, "loss": 4.574818134307861 }, { "id": "identity_312191", "image": "VQA2.0/images/71608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the backpack of the person in red?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 18.0625, "loss": 4.065018653869629 }, { "id": "identity_266451", "image": "DVQA/images/bar_train_00172021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bottommost bar in each group?" }, { "from": "gpt", "value": "The answer is town" } ], "clip_score": 18.921875, "loss": 4.502164363861084 }, { "id": "identity_194987", "image": "A-OKVQA/images/308900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.28125, "loss": 4.433758735656738 }, { "id": "identity_203869", "image": "PMC-VQA/images/PMC7931264_FIG2_383925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the hand in the image?" }, { "from": "gpt", "value": "The answer is Palmar view" } ], "clip_score": 31.421875, "loss": 5.13574743270874 }, { "id": "identity_210232", "image": "VQA2.0/images/387514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lights are visible on the rear of the motorcycle?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.484375, "loss": 4.124448299407959 }, { "id": "identity_288950", "image": "PMC-VQA/images/PMC2940168_F0001_73732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image shown?\nChoices:\n(A) Bilateral abscess \n(B) Abscess deep in the left pectoralis major muscle \n(C) Abscess deep in the right pectoralis major muscle extending into the axilla \n(D) Abscess in the right arm muscles" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.149864196777344 }, { "id": "identity_281281", "image": "A-OKVQA/images/481563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of waterway is the river channel? \nChoices:\n(A) sea\n(B) canal\n(C) pond\n(D) river" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.421875, "loss": 4.438098907470703 }, { "id": "identity_330501", "image": "AI2D/images/3459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does label B indicate?\n\nChoices:\n(A) Spiracle\n(B) Hind Wing\n(C) Ovipositor\n(D) Front Wing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.859375, "loss": 4.3014092445373535 }, { "id": "identity_283617", "image": "DVQA/images/bar_train_00125842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars all one solid color with no patterns?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.796875, "loss": 4.033171653747559 }, { "id": "identity_306777", "image": "A-OKVQA/images/316194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this red bus doing?\nChoices:\n(A) being repaired\n(B) food sale\n(C) being abandoned\n(D) carrying passengers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.109375, "loss": 4.464071750640869 }, { "id": "identity_306018", "image": "PMC-VQA/images/PMC5977127_jcm-07-00088-f003_321235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the hematoma located?\nChoices:\n(A) Right thigh \n(B) Left thigh \n(C) Right arm \n(D) Left arm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.140625, "loss": 4.0915327072143555 }, { "id": "identity_264156", "image": "PMC-VQA/images/PMC5636852_Fig1_226248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices best represents image d? \n(A) Preoperative growth \n(B) Postoperative CT scan \n(C) Tumor recurrence/regrowth \n(D) 15 weeks following treatment" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.3125, "loss": 4.278775691986084 }, { "id": "identity_4497", "image": "ScienceQA/images/11729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Columbia\n(B) Charleston\n(C) Biloxi\n(D) Baton Rouge" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.046875, "loss": 4.272327423095703 }, { "id": "identity_17301", "image": "ScienceQA/images/881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Little Rock\n(B) Oklahoma City\n(C) Raleigh\n(D) Charlotte" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 4.190183639526367 }, { "id": "identity_337373", "image": "PMC-VQA/images/PMC6966983_Fig1_102163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the X-ray reveal on the day of surgery?\nChoices:\n(A) butterfly shadows\n(B) bilateral pulmonary edema\n(C) normal lung conditions\n(D) bilateral pleural effusion" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.296875, "loss": 4.2450785636901855 }, { "id": "identity_320333", "image": "TQA/images/question_images/ocean_waves_7126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which two points would you measure something to determine the height of the wave?\nChoices:\n(A) trough and still water level\n(B) crest and crest\n(C) Still water level and crest\n(D) trough and crest" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.75, "loss": 4.4519805908203125 }, { "id": "identity_278212", "image": "ScienceQA/images/17361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\nChoices:\n(A) Madison\n(B) Montpelier\n(C) Carson City\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.34375, "loss": 4.179440498352051 }, { "id": "identity_7626", "image": "A-OKVQA/images/217517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the scape most likely?\nChoices:\n(A) tokyo\n(B) cairo\n(C) beijing\n(D) singapore" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 4.238137722015381 }, { "id": "identity_301395", "image": "A-OKVQA/images/113097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the guitar in the painting hung in the middle of the wall on the right?\nChoices:\n(A) purple\n(B) blue\n(C) green\n(D) red" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.671875, "loss": 4.03676700592041 }, { "id": "identity_302772", "image": "Super-CLEVR/images/superCLEVR_new_001955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of large gray things that are in front of the cyan bike and big yellow bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 30.21875, "loss": 4.9356913566589355 }, { "id": "identity_320661", "image": "Super-CLEVR/images/superCLEVR_new_000190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same size as this red object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.5625, "loss": 5.021294593811035 }, { "id": "identity_279535", "image": "DVQA/images/bar_train_00199856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.65625, "loss": 4.387357234954834 }, { "id": "identity_122365", "image": "VQA2.0/images/553894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.0, "loss": 4.507111072540283 }, { "id": "identity_262191", "image": "ScienceQA/images/1358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Iowa\n(B) Delaware\n(C) New Jersey\n(D) New York" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 5.08364200592041 }, { "id": "identity_74088", "image": "ScienceQA/images/3615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.3125, "loss": 4.573719024658203 }, { "id": "identity_196423", "image": "TextVQA/images/293f507bd888d00a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many smaller clock faces are there around the main clock face?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.484375, "loss": 4.302156448364258 }, { "id": "identity_38701", "image": "ScienceQA/images/5514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) salty\n(B) opaque\n(C) soft" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3046875, "loss": 4.323227405548096 }, { "id": "identity_3839", "image": "ScienceQA/images/8207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Mississippi\n(B) Pennsylvania\n(C) New Jersey\n(D) Delaware" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.1875, "loss": 4.482975959777832 }, { "id": "identity_296628", "image": "DocVQA/images/zxyg0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which paragraph of the report is the correction located?" }, { "from": "gpt", "value": "The answer is first" } ], "clip_score": 17.234375, "loss": 4.044236660003662 }, { "id": "identity_32516", "image": "A-OKVQA/images/536145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What's likely the word between see and now on the person's back?\nChoices:\n(A) mexico\n(B) me\n(C) money\n(D) mom" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.015625, "loss": 4.221127510070801 }, { "id": "identity_314296", "image": "FigureQA/images/96928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.671875, "loss": 4.007981300354004 }, { "id": "identity_198911", "image": "PMC-VQA/images/PMC5985138_fig1_323495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see any foreign objects in the radiograph?" }, { "from": "gpt", "value": "The answer is No foreign objects detected" } ], "clip_score": 26.703125, "loss": 4.591650485992432 }, { "id": "identity_273896", "image": "Super-CLEVR/images/superCLEVR_new_000164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any small purple matte objects have a similar shape to the yellow matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.515625, "loss": 5.138282775878906 }, { "id": "identity_20936", "image": "PMC-VQA/images/PMC5360669_Fig1_143718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fracture is depicted in the X-rays?\nChoices:\n(A) Intraarticular \n(B) Pathological \n(C) Extra-Articular \n(D) Transverse " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.203125, "loss": 4.203460693359375 }, { "id": "identity_284758", "image": "ScienceQA/images/7676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Des Moines\n(B) Lansing\n(C) Phoenix\n(D) Tucson" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.765625, "loss": 4.15350866317749 }, { "id": "identity_108005", "image": "TextVQA/images/ee69c6123b29683e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many figurines are displayed in front of the 3D printer?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.359375, "loss": 4.558769702911377 }, { "id": "identity_16136", "image": "TQA/images/question_images/layers_of_atmosphere_7070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the ozone to the exosphere?\nChoices:\n(A) outer space\n(B) troposphere\n(C) ionosphere\n(D) tropopause" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.953125, "loss": 4.035662651062012 }, { "id": "identity_126616", "image": "VQA-AS/images/12912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cats are in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.953125, "loss": 4.3411784172058105 }, { "id": "identity_277861", "image": "A-OKVQA/images/567142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way is the blue vessel transported to this location?\nChoices:\n(A) manpower\n(B) tugged\n(C) steam\n(D) motor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.40625, "loss": 4.3486127853393555 }, { "id": "identity_246821", "image": "A-OKVQA/images/190670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary product advertised on the website, and how does it relate to the sport being played in the image?" }, { "from": "gpt", "value": "The answer is team hats" } ], "clip_score": 20.40625, "loss": 4.810492515563965 }, { "id": "identity_250611", "image": "PMC-VQA/images/PMC7302052_f0010_194713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the body contour displayed in on the coronal view, and how many different colors are used to highlight different regions in the image?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 18.375, "loss": 4.830724716186523 }, { "id": "identity_120374", "image": "Super-CLEVR/images/superCLEVR_new_001846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located at the center of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.859375, "loss": 4.051340579986572 }, { "id": "identity_117595", "image": "VQA2.0/images/224414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible outside the building?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.625, "loss": 4.395526885986328 }, { "id": "identity_195174", "image": "PMC-VQA/images/PMC4689804_F1_455978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in this color-coded image?" }, { "from": "gpt", "value": "The answer is No obvious signs, professional assessment needed" } ], "clip_score": 27.234375, "loss": 4.989747524261475 }, { "id": "identity_215609", "image": "A-OKVQA/images/340922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.7890625, "loss": 4.379110813140869 }, { "id": "identity_294557", "image": "A-OKVQA/images/225850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is this person most likely to engage in?\nChoices:\n(A) working\n(B) birdwatching\n(C) gymnastics\n(D) jogging" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.734375, "loss": 4.360358715057373 }, { "id": "identity_1180", "image": "PMC-VQA/images/PMC8772673_F1_168112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modality was used to produce image C2? \nChoices:\n(A) X-Ray \n(B) CT scan \n(C) T1-weighted MRI \n(D) T2-weighted MRI " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.203125, "loss": 4.3211588859558105 }, { "id": "identity_288723", "image": "FigureQA/images/34781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Gray the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.78125, "loss": 4.450726509094238 }, { "id": "identity_110367", "image": "A-OKVQA/images/352127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chefs are visible inside the depicted establishment on the vehicle?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 31.125, "loss": 4.141423225402832 }, { "id": "identity_268839", "image": "Super-CLEVR/images/superCLEVR_new_001116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a greater number of rubber airplanes in front of the jet compared to the tiny blue helicopters?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.953125, "loss": 5.08928918838501 }, { "id": "identity_39557", "image": "DVQA/images/bar_train_00109553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest positive value?" }, { "from": "gpt", "value": "The answer is artist" } ], "clip_score": 21.828125, "loss": 4.012326240539551 }, { "id": "identity_12899", "image": "A-OKVQA/images/423025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of enthusiasts are gathering here?\nChoices:\n(A) gamers\n(B) democrats\n(C) bikers\n(D) liberals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0625, "loss": 4.336528778076172 }, { "id": "identity_290194", "image": "PMC-VQA/images/PMC7641295_Fig2_290908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could the abnormal regions in the lower lobes potentially indicate?\nChoices:\n(A) Emphysema \n(B) Pneumonia \n(C) Tuberculosis \n(D) Asthma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.8125, "loss": 4.225939750671387 }, { "id": "identity_19521", "image": "TextVQA/images/5371cd2bc59dab7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is seen on camera 1?" }, { "from": "gpt", "value": "The answer is layers below" } ], "clip_score": 21.921875, "loss": 6.4140801429748535 }, { "id": "identity_57172", "image": "DVQA/images/bar_train_00101937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest representation in the 'essay' group?" }, { "from": "gpt", "value": "The answer is score" } ], "clip_score": 30.328125, "loss": 4.40402889251709 }, { "id": "identity_319615", "image": "DocVQA/images/qpvw0217_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author mentioned in this tracking report?" }, { "from": "gpt", "value": "The answer is F Stanczyk" } ], "clip_score": 23.34375, "loss": 4.818869590759277 }, { "id": "identity_282281", "image": "PMC-VQA/images/PMC6294250_Fig1_411683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phenomenon is being observed in the image provided?\nChoices:\n(A) Lack of globular structures \n(B) Partial retention in intracellular structures \n(C) Complete retention in extracellular structures \n(D) Complete loss of intracellular structures." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.08358097076416 }, { "id": "identity_332924", "image": "VizWiz/images/VizWiz_train_00007607.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of plant is this?" }, { "from": "gpt", "value": "The answer is babys breath" } ], "clip_score": 24.140625, "loss": 4.20750617980957 }, { "id": "identity_128531", "image": "A-OKVQA/images/567142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.84375, "loss": 4.41864013671875 }, { "id": "identity_269174", "image": "DVQA/images/bar_train_00105722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many groups of bars is there at least one bar with a value greater than -1?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 20.4375, "loss": 4.058351993560791 }, { "id": "identity_199726", "image": "A-OKVQA/images/3528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object in the air?" }, { "from": "gpt", "value": "The answer is A ball" } ], "clip_score": 11.953125, "loss": 4.255312442779541 }, { "id": "identity_155052", "image": "AI2D/images/3884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the \"Leaf Scar\" in the diagram?" }, { "from": "gpt", "value": "The answer is Mark left after a leaf falls off" } ], "clip_score": 18.4375, "loss": 4.202940940856934 }, { "id": "identity_319710", "image": "ScienceQA/images/2894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Rhode Island\n(B) Ohio\n(C) Connecticut\n(D) Maine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.671875, "loss": 4.700509071350098 }, { "id": "identity_252183", "image": "TextVQA/images/ea263375ec83dcd1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the full text written on the blue part of the sign?" }, { "from": "gpt", "value": "The answer is CLARK A-2 COUNTY" } ], "clip_score": 26.4375, "loss": 4.065816402435303 }, { "id": "identity_127138", "image": "VQA2.0/images/146098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many miniature figurines are visible on the desk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.265625, "loss": 4.4988789558410645 }, { "id": "identity_220173", "image": "TextVQA/images/b3fa1b123ce4a692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles have a purple content?" }, { "from": "gpt", "value": "The answer is Four bottles" } ], "clip_score": 28.390625, "loss": 4.1061882972717285 }, { "id": "identity_288259", "image": "Super-CLEVR/images/superCLEVR_new_000824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that share the same shape as the small green metallic object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.859375, "loss": 5.190622806549072 }, { "id": "identity_300929", "image": "FigureQA/images/22206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Mint the high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.140625, "loss": 4.013261318206787 }, { "id": "identity_276691", "image": "TQA/images/question_images/human_system_respiratory_2863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which structure is directly below the adenoids among the following choices: Larynx, Trachea, Tonsils, or Pleura?\nChoices:\n(A) Larynx\n(B) Trachea\n(C) Tonsils\n(D) Pleura" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.46875, "loss": 4.087617874145508 }, { "id": "identity_105008", "image": "VQA2.0/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are around the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.90625, "loss": 4.175574779510498 }, { "id": "identity_219925", "image": "A-OKVQA/images/148395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the green car in the image?" }, { "from": "gpt", "value": "The answer is Gutbrod" } ], "clip_score": 22.140625, "loss": 4.000360012054443 }, { "id": "identity_31498", "image": "PMC-VQA/images/PMC7770742_Fig1_331507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the initial radiological finding in the chest CT scan? \nChoices:\n(A) Consolidation \n(B) Cavitation \n(C) Ground Glass Opacities \n(D) Cavitary nodules " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.4375, "loss": 4.1795148849487305 }, { "id": "identity_33700", "image": "ScienceQA/images/4882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) Houston\n(B) Boston\n(C) Washington, D.C.\n(D) Detroit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 4.063899040222168 }, { "id": "identity_104492", "image": "A-OKVQA/images/114288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are waiting to cross the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.1875, "loss": 4.208071708679199 }, { "id": "identity_261767", "image": "AI2D/images/3313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What number does label B represent?\nChoices:\n(A) 5\n(B) 3\n(C) 6\n(D) 7" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.953125, "loss": 4.085375785827637 }, { "id": "identity_8106", "image": "FigureQA/images/20804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.9375, "loss": 4.015561580657959 }, { "id": "identity_4280", "image": "ScienceQA/images/8548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) scratchy\n(B) colorful" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.0, "loss": 4.549978256225586 }, { "id": "identity_325923", "image": "ScienceQA/images/15624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nampa\n(B) Salem\n(C) Denver\n(D) Boise" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.796875, "loss": 4.057297229766846 }, { "id": "identity_33179", "image": "PMC-VQA/images/PMC5922503_FIG3_306980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the chest shows the pleural effusion?\nChoices:\n(A) Right side. \n(B) Front side. \n(C) Left side. \n(D) Back side. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.515625, "loss": 4.246711730957031 }, { "id": "identity_291974", "image": "PMC-VQA/images/PMC7939202_F1_385981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the hypointensities observed on MRI located?\nChoices:\n(A) Basal ganglia\n(B) Hippocampus\n(C) Corticospinal tracts\n(D) Corpus callosum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.59375, "loss": 4.229985237121582 }, { "id": "identity_321467", "image": "IconQA/images/7897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a moon. What is the second image?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5625, "loss": 4.070858955383301 }, { "id": "identity_75903", "image": "AI2D/images/2107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the berries stop growing, which animal will be directly affected in this food web?" }, { "from": "gpt", "value": "The answer is Lemming" } ], "clip_score": 26.765625, "loss": 4.486704349517822 }, { "id": "identity_35809", "image": "Super-CLEVR/images/superCLEVR_new_000985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bicycles of the same color as the rubber wagon?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.046875, "loss": 5.294127464294434 }, { "id": "identity_61589", "image": "DocVQA/images/khcn0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who has forwarded the meeting request?" }, { "from": "gpt", "value": "The answer is Inge, Carolyn" } ], "clip_score": 19.53125, "loss": 4.585515975952148 }, { "id": "identity_272925", "image": "AI2D/images/2007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which of the following sources do deer obtain energy?\nChoices:\n(A) Snake\n(B) Plants\n(C) Beetle\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.017544746398926 }, { "id": "identity_227896", "image": "TextVQA/images/05bd3c543fbfe4af.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme of the event, and who are the key speakers listed for the event?" }, { "from": "gpt", "value": "The answer is Demilitarization and debt relief; Sandra Quintela, Camille Chalmers, Nora Cortiñas, Nahum Ebrahim Lalin Guty" } ], "clip_score": 27.359375, "loss": 4.628963947296143 }, { "id": "identity_188063", "image": "A-OKVQA/images/3538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.390625, "loss": 4.256765842437744 }, { "id": "identity_229142", "image": "PMC-VQA/images/PMC6682679_F1_19873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the functions of the two structures labeled in different colors on the image, and how do they contribute to neuronal activity?" }, { "from": "gpt", "value": "The answer is NeuN marks neurons; βIV-spectrin maintains AIS integrity" } ], "clip_score": 27.609375, "loss": 4.137819766998291 }, { "id": "identity_136023", "image": "DocVQA/images/xgcf0004_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the project?" }, { "from": "gpt", "value": "The answer is ASR Mass Add Names - May" } ], "clip_score": 21.46875, "loss": 6.061455249786377 }, { "id": "identity_268196", "image": "TextVQA/images/13ec950b0f7f07db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does this watch belong to?" }, { "from": "gpt", "value": "The answer is nomos " } ], "clip_score": 30.484375, "loss": 4.784231662750244 }, { "id": "identity_318713", "image": "Super-CLEVR/images/superCLEVR_new_000789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small trucks that have the same color as this small rubber double bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.359375, "loss": 5.466989040374756 }, { "id": "identity_329614", "image": "PMC-VQA/images/PMC5058823_F2_82761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the lungs shows issues in the images?\nChoices:\n(A) Alveoli\n(B) Bronchial tree\n(C) Diaphragm\n(D) Pleura" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 5.043768405914307 }, { "id": "identity_334537", "image": "FigureQA/images/62130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the highest value associated with Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.75, "loss": 4.145980358123779 }, { "id": "identity_332061", "image": "FigureQA/images/30592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.296875, "loss": 4.068260669708252 }, { "id": "identity_318698", "image": "AI2D/images/3279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled as A in the diagram?\nChoices:\n(A) mitochondrion\n(B) cell wall\n(C) cell membrane\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.328125, "loss": 4.361835479736328 }, { "id": "identity_36958", "image": "PMC-VQA/images/PMC6500019_Fig1_475204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the characteristic of the lesions?\nChoices:\n(A) Solid lesions with thick wall \n(B) Solid lesions with thin wall \n(C) Cystic lesions with thick wall \n(D) Cystic lesions with thin wall" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.203125, "loss": 4.261786460876465 }, { "id": "identity_313383", "image": "FigureQA/images/71519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.015625, "loss": 4.033592224121094 }, { "id": "identity_22831", "image": "DocVQA/images/fpjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand mentioned in the confirmation?" }, { "from": "gpt", "value": "The answer is WINSTON" } ], "clip_score": 16.78125, "loss": 4.005120277404785 }, { "id": "identity_312853", "image": "PMC-VQA/images/PMC6923963_Fig6_88003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the imaging plane in this study?\nChoices:\n(A) Axial\n(B) Coronal\n(C) Sagittal\n(D) Oblique" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.748723030090332 }, { "id": "identity_192973", "image": "PMC-VQA/images/PMC3320613_pgen-1002631-g004_132962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the structure in the center of the image?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 15.9609375, "loss": 4.366204738616943 }, { "id": "identity_193538", "image": "A-OKVQA/images/251777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visibly wearing glasses?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 11.5, "loss": 4.986705303192139 }, { "id": "identity_32049", "image": "DocVQA/images/fnvj0223_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of Rita Kumari?" }, { "from": "gpt", "value": "The answer is director" } ], "clip_score": 21.015625, "loss": 4.348242282867432 }, { "id": "identity_19555", "image": "AI2D/images/4847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of object on Earth is represented by the diagram?\nChoices:\n(A) River\n(B) Mountain/Volcano\n(C) Hill\n(D) Tree" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.59375, "loss": 4.27509069442749 }, { "id": "identity_129910", "image": "A-OKVQA/images/469728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is in the background?" }, { "from": "gpt", "value": "The answer is Pub" } ], "clip_score": 17.703125, "loss": 4.091132640838623 }, { "id": "identity_25734", "image": "AI2D/images/2866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the middle ear?\nChoices:\n(A) b\n(B) l\n(C) r\n(D) i" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.03125, "loss": 5.004539489746094 }, { "id": "identity_25334", "image": "A-OKVQA/images/468340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the man farthest to the right have on his neck?\nChoices:\n(A) scarf\n(B) hands\n(C) goggles\n(D) bowtie" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.203125, "loss": 4.237442970275879 }, { "id": "identity_307708", "image": "AI2D/images/4010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the state of matter at the innermost layer?\nChoices:\n(A) gas\n(B) liquid\n(C) rigid\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.75, "loss": 5.0293402671813965 }, { "id": "identity_284664", "image": "ScienceQA/images/9384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from the surroundings to each vial or from each vial to the surroundings?\nChoices:\n(A) the surroundings . . . each vial\n(B) each vial . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.140625, "loss": 4.000481605529785 }, { "id": "identity_221886", "image": "VQA2.0/images/411111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the sidewalk?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.3125, "loss": 4.776089191436768 }, { "id": "identity_269776", "image": "IconQA/images/20194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.890625, "loss": 4.433332920074463 }, { "id": "identity_338619", "image": "DocVQA/images/xgmf0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who published the American Heart Association Cookbook?" }, { "from": "gpt", "value": "The answer is David McKay" } ], "clip_score": 22.1875, "loss": 4.034225940704346 }, { "id": "identity_168770", "image": "IconQA/images/89237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the bottom row of squares was to continue, what would be the size of the 11th square?" }, { "from": "gpt", "value": "The answer is Small square" } ], "clip_score": 29.078125, "loss": 4.380585193634033 }, { "id": "identity_296501", "image": "PMC-VQA/images/PMC9157057_rbac030-F6_298596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of samples were utilized in the images provided? \nOptions:\n(A) Plants \n(B) Animals \n(C) Rocks \n(D) Unknown" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.078125, "loss": 5.55202579498291 }, { "id": "identity_332201", "image": "TextVQA/images/7f6c3823509de616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of food does this place serve?" }, { "from": "gpt", "value": "The answer is italian " } ], "clip_score": 16.890625, "loss": 4.700893402099609 }, { "id": "identity_10000", "image": "PMC-VQA/images/PMC4767903_F8_5955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the histopathology shown in the image? \nChoices:\n(A) Heart \n(B) Brain \n(C) Lung \n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.828125, "loss": 4.3984832763671875 }, { "id": "identity_295902", "image": "PMC-VQA/images/PMC7824157_medicina-57-00018-f002_348821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the image? \nOptions:\n(A) MRI\n(B) CT scan\n(C) X-ray\n(D) PET scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.296875, "loss": 4.247450828552246 }, { "id": "identity_281854", "image": "A-OKVQA/images/68657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most probable content of the blurry boxes in the background?\nChoices:\n(A) seeds\n(B) action figures\n(C) video games\n(D) raisins" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.5078125, "loss": 4.823163032531738 }, { "id": "identity_298069", "image": "PMC-VQA/images/PMC8413751_brb32180-fig-0001_50741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of brain tumor is depicted in the image provided?\nChoices:\n(A) Meningioma \n(B) Glioblastoma \n(C) Schwannoma \n(D) Pituitary adenoma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.65625, "loss": 4.701528549194336 }, { "id": "identity_334368", "image": "A-OKVQA/images/557721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of these objects?\nChoices:\n(A) discourage children\n(B) for sale\n(C) obstruction\n(D) decorative" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.734375, "loss": 4.484349250793457 }, { "id": "identity_62409", "image": "DVQA/images/bar_train_00099723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference percentage in the \"ocean\" category?" }, { "from": "gpt", "value": "The answer is ground" } ], "clip_score": 30.859375, "loss": 4.205559730529785 }, { "id": "identity_320413", "image": "PMC-VQA/images/PMC6994141_pntd.0007952.g001_109273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the stained receptor in the images? \nChoices:\n(A) Blue; \n(B) Red; \n(C) Green; \n(D) Purple" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.265625, "loss": 4.172316551208496 }, { "id": "identity_233875", "image": "TextVQA/images/383c8211b1fe1a92.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the store located directly above the \"Lider Express\" banner?" }, { "from": "gpt", "value": "The answer is D-FIS" } ], "clip_score": 19.640625, "loss": 5.113206386566162 }, { "id": "identity_310934", "image": "ScienceQA/images/1032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tucson\n(B) Cheyenne\n(C) Carson City\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.234375, "loss": 4.80491304397583 }, { "id": "identity_120556", "image": "VQA2.0/images/275793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many personal care products are visible on the sink counter?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.109375, "loss": 4.684933662414551 }, { "id": "identity_6710", "image": "ScienceQA/images/15709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of South Carolina?\nChoices:\n(A) Columbia\n(B) Nampa\n(C) Charleston\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.71875, "loss": 4.261928081512451 }, { "id": "identity_308923", "image": "DVQA/images/bar_train_00068424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.03125, "loss": 4.221421241760254 }, { "id": "identity_291227", "image": "IconQA/images/60943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.030154228210449 }, { "id": "identity_280508", "image": "IconQA/images/18214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, given that the first picture is a rocket?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.546875, "loss": 4.509746074676514 }, { "id": "identity_194536", "image": "PMC-VQA/images/PMC4412507_Fig1_381542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the highlighted area in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 20.984375, "loss": 5.236836910247803 }, { "id": "identity_26538", "image": "IconQA/images/44783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past eight." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.734375, "loss": 5.413320064544678 }, { "id": "identity_139902", "image": "DocVQA/images/fsph0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first task listed for the AD AGENCY?" }, { "from": "gpt", "value": "The answer is Contact Miss America of Kansas for possible role in dedication ceremonies" } ], "clip_score": 24.90625, "loss": 4.55422306060791 }, { "id": "identity_31573", "image": "PMC-VQA/images/PMC7310451_Fig2_197442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vein was not affected by the tumor thrombus?\nChoices:\n(A) Left hepatic vein \n(B) Superior vena cava \n(C) Right phrenic vein \n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.8125, "loss": 4.620840549468994 }, { "id": "identity_110220", "image": "VizWiz/images/VizWiz_train_00023808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object that is covering part of the computer screen?" }, { "from": "gpt", "value": "The answer is Arrow" } ], "clip_score": 26.1875, "loss": 4.162034034729004 }, { "id": "identity_334133", "image": "PMC-VQA/images/PMC3905103_F3_262232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary symptom of the condition depicted in the image?\nChoices:\n(A) Greenish spots on the retina\n(B) Bilateral hyperfluorescent lesions\n(C) Miosis\n(D) Diplopia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.90625, "loss": 4.45529317855835 }, { "id": "identity_3509", "image": "Super-CLEVR/images/superCLEVR_new_001641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a big road bike that is to the left of the brown thing that is behind the brown thing that is on the left side of the large brown thing; what is its material?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.09375, "loss": 4.3402509689331055 }, { "id": "identity_125645", "image": "VQA-AS/images/11085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are in the picnic basket?" }, { "from": "gpt", "value": "The answer is Two items" } ], "clip_score": 28.609375, "loss": 4.224193096160889 }, { "id": "identity_304956", "image": "IconQA/images/91694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown on this clock?\nChoices:\n(A) 10:00\n(B) 2:00" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.03125, "loss": 4.245792865753174 }, { "id": "identity_320690", "image": "Super-CLEVR/images/superCLEVR_new_001249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the small yellow vehicle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 21.921875, "loss": 4.07907772064209 }, { "id": "identity_16794", "image": "A-OKVQA/images/182005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The flag of which country is flying on the boat?\nChoices:\n(A) austria\n(B) canada\n(C) lebanon\n(D) peru" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.59375, "loss": 4.279819488525391 }, { "id": "identity_286677", "image": "PMC-VQA/images/PMC7773299_FIG5_332416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What findings are visible in the CT angiogram of the patient's abdomen and pelvis? \nChoices:\n(A) Dilated blood vessels\n(B) Peripheral ground-glass opacities bilaterally\n(C) Blood clots\n(D) Cysts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.484375, "loss": 4.229127407073975 }, { "id": "identity_221039", "image": "VQA2.0/images/76414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lifebuoys can be seen on the boat to the right?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.328125, "loss": 4.340559005737305 }, { "id": "identity_148727", "image": "DocVQA/images/rnjf0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most commonly mentioned reason not to prescribe TZDs?" }, { "from": "gpt", "value": "The answer is Cost" } ], "clip_score": 26.421875, "loss": 4.484768867492676 }, { "id": "identity_35104", "image": "PMC-VQA/images/PMC7017548_Fig5_116604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the X-ray? \nChoices:\n(A) Chest X-ray \n(B) Abdominal X-ray \n(C) Lumbar X-ray \n(D) Pelvic X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.0, "loss": 5.097908020019531 }, { "id": "identity_370", "image": "A-OKVQA/images/114288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason is the buses door open here?\nChoices:\n(A) loading passengers\n(B) stop light\n(C) driving\n(D) collecting donations" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 4.375439643859863 }, { "id": "identity_30108", "image": "A-OKVQA/images/23660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which giraffe left to right has the best chance of getting petted?\nChoices:\n(A) second\n(B) very back\n(C) first\n(D) third one" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 4.063317775726318 }, { "id": "identity_204839", "image": "A-OKVQA/images/324082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equipment is attached to the belt of the figure on the left?" }, { "from": "gpt", "value": "The answer is Radio and pouches" } ], "clip_score": 14.8671875, "loss": 4.142911911010742 }, { "id": "identity_336562", "image": "PMC-VQA/images/PMC7921924_diagnostics-11-00331-f005_380805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the kidney is enlarged here?\nChoices:\n(A) Left kidney \n(B) Both kidneys \n(C) Renal sinus \n(D) Right kidney" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.796875, "loss": 4.034148693084717 }, { "id": "identity_29368", "image": "A-OKVQA/images/71563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What video game genre is seen on the computer monitor?\nChoices:\n(A) real-time strategy\n(B) shooter\n(C) platform\n(D) role-playing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.71875, "loss": 4.035175800323486 }, { "id": "identity_321443", "image": "A-OKVQA/images/480005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person taking the items on the boat?\nChoices:\n(A) throwing away\n(B) to market\n(C) home\n(D) church" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.65625, "loss": 4.595548152923584 }, { "id": "identity_65175", "image": "ScienceQA/images/9684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the stuffed puppy have a visible tail?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.265625, "loss": 4.113455772399902 }, { "id": "identity_145710", "image": "DocVQA/images/gtxb0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different positions has William J. Darby held at Vanderbilt University School of Medicine?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.546875, "loss": 4.393916130065918 }, { "id": "identity_314229", "image": "FigureQA/images/54815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.71875, "loss": 4.577096462249756 }, { "id": "identity_123260", "image": "PMC-VQA/images/PMC6426548_F9_454292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify any signs of ligament injury in this MRI image?" }, { "from": "gpt", "value": "The answer is No signs of injury visible" } ], "clip_score": 35.875, "loss": 4.124532699584961 }, { "id": "identity_332262", "image": "PMC-VQA/images/PMC4920187_BIO017384F2_44740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead point to in these images?\nChoices:\n(A) thio-T-positive SC35-negative particle \n(B) a thio-T and SC35-positive particle \n(C) nucleus \n(D) nucleolus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.453125, "loss": 4.689105033874512 }, { "id": "identity_278620", "image": "PMC-VQA/images/PMC4349652_pone.0118849.g003_364003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was applied to the nasal mucosa sections? \nOptions:\n(A) Congo Red and Alcian Blue\n(B) Hematoxylin and Eosin\n(C) Periodic Acid-Schiff\n(D) Toluidine Blue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.121710777282715 }, { "id": "identity_329499", "image": "A-OKVQA/images/233119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would you most likely do with a card in this place?\nChoices:\n(A) magic trick\n(B) open door\n(C) play cards\n(D) banking" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.6484375, "loss": 4.553847312927246 }, { "id": "identity_275936", "image": "A-OKVQA/images/259186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of support sleeves or braces is the player wearing? Ankle, knee, elbow, or wrist?\nChoices:\n(A) ankle\n(B) knee\n(C) elbow\n(D) wrist" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.875, "loss": 4.213666915893555 }, { "id": "identity_318459", "image": "VizWiz/images/VizWiz_train_00014569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What vocab word is in the first blank?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.734375, "loss": 4.037246227264404 }, { "id": "identity_325229", "image": "TQA/images/question_images/human_system_nervous_6175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the frontal cortex located in relation to?\n\nChoices:\n(A) Striatum\n(B) VTA\n(C) Nucleus accumbens\n(D) Substantia nigra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.46875, "loss": 4.159518241882324 }, { "id": "identity_158620", "image": "ScienceQA/images/13118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature that forms the southern border of the highlighted state?" }, { "from": "gpt", "value": "The answer is Connecticut River" } ], "clip_score": 29.390625, "loss": 4.240673065185547 }, { "id": "identity_331195", "image": "A-OKVQA/images/18375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of Bank of America in this context?\nChoices:\n(A) loan provider\n(B) site provider\n(C) sponsor\n(D) fund provider" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.7890625, "loss": 4.415440559387207 }, { "id": "identity_328545", "image": "ScienceQA/images/9859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Billings\n(B) Burlington\n(C) Montpelier\n(D) New York City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.195522785186768 }, { "id": "identity_206320", "image": "TextVQA/images/0e3dbba8d9235257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings have a pyramid-shaped top?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.171875, "loss": 4.397341728210449 }, { "id": "identity_106121", "image": "A-OKVQA/images/461378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total incense sticks can be counted in the display?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 27.03125, "loss": 4.357609272003174 }, { "id": "identity_206503", "image": "PMC-VQA/images/PMC4097124_F3_306001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the central panel with the green cross lines indicating?" }, { "from": "gpt", "value": "The answer is Reference point for orientation" } ], "clip_score": 22.125, "loss": 4.891367435455322 }, { "id": "identity_320811", "image": "PMC-VQA/images/PMC4074584_F4_302022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in this image?\nChoices:\n(A) Fibrillar surface\n(B) Mineral-like structures\n(C) Root canal fillings\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.890625, "loss": 4.114642143249512 }, { "id": "identity_295692", "image": "A-OKVQA/images/498259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the large number of people on the boat?\nChoices:\n(A) taking tour\n(B) fishing expedition\n(C) rowing team\n(D) life boat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.140625, "loss": 4.328383445739746 }, { "id": "identity_243403", "image": "PMC-VQA/images/PMC6904451_Fig2_81853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, which organs are indicated by \"L\" and \"R\", and what is the primary function of each organ?" }, { "from": "gpt", "value": "The answer is Filter blood and produce urine" } ], "clip_score": 25.921875, "loss": 4.021384239196777 }, { "id": "identity_320396", "image": "ScienceQA/images/15674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbus\n(B) Indianapolis\n(C) Cincinnati\n(D) Cleveland" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.59375, "loss": 4.2961106300354 }, { "id": "identity_77634", "image": "AI2D/images/3258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ribosomes are depicted in the Prokaryotic Cell?" }, { "from": "gpt", "value": "The answer is An integer number (count the dots)" } ], "clip_score": 28.265625, "loss": 4.225618839263916 }, { "id": "identity_254415", "image": "VQA2.0/images/397192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the colors of the top and legs of the bench, and how do they contrast with each other?" }, { "from": "gpt", "value": "The answer is Brown top, black legs, high contrast" } ], "clip_score": 27.609375, "loss": 4.092834949493408 }, { "id": "identity_268327", "image": "A-OKVQA/images/349822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following multiple person sports is currently being played? \n(A) cricket\n(B) frisbee\n(C) tennis\n(D) badminton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.1503705978393555 }, { "id": "identity_288111", "image": "A-OKVQA/images/483316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the next action that the train will take from the following choices: (A) evacuate passengers, (B) move, (C) close doors, or (D) power off?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.140625, "loss": 4.827120304107666 }, { "id": "identity_261405", "image": "PMC-VQA/images/PMC8769554_F3_167335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique was employed to capture the images?\nChoices:\n(A) Scanning electron microscopy\n(B) Transmission electron microscopy\n(C) Fluorescence microscopy\n(D) X-ray microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.8125, "loss": 4.088863372802734 }, { "id": "identity_255263", "image": "DocVQA/images/jzbn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps the EPA should take if the information gathered is insufficient to designate the priority of a batched chemical?" }, { "from": "gpt", "value": "The answer is Issue notice, voluntary collection, determine necessity, accountability, iterate process" } ], "clip_score": 24.875, "loss": 5.238209247589111 }, { "id": "identity_7990", "image": "TQA/images/question_images/cycle_rock_6724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What rocks are below the sedimentary rocks?\nChoices:\n(A) Sediments\n(B) Metamorphic rocks\n(C) Solidification\n(D) Igneous rocks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.338024139404297 }, { "id": "identity_310498", "image": "AI2D/images/3727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents this part?\nChoices:\n(A) h\n(B) e\n(C) g\n(D) k" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.265625, "loss": 4.212681770324707 }, { "id": "identity_285061", "image": "A-OKVQA/images/388179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the men wearing orange vests?\nChoices:\n(A) camouflage\n(B) visibility\n(C) fashion\n(D) costume" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.078125, "loss": 4.909483909606934 }, { "id": "identity_63820", "image": "ScienceQA/images/458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the temperature range of -5 to 5 degrees Celsius?" }, { "from": "gpt", "value": "The answer is Light blue to white gradient" } ], "clip_score": 19.875, "loss": 4.629275798797607 }, { "id": "identity_59762", "image": "DocVQA/images/gzmd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the department that reviewed the request on March 12, 1997?" }, { "from": "gpt", "value": "The answer is Law (LAW)" } ], "clip_score": 23.640625, "loss": 4.136033535003662 }, { "id": "identity_109889", "image": "TextVQA/images/4db74e15923dd476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the iPhone in the image have any visible physical buttons?" }, { "from": "gpt", "value": "The answer is No visible buttons" } ], "clip_score": 25.890625, "loss": 4.4656662940979 }, { "id": "identity_302591", "image": "DVQA/images/bar_train_00119478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which thing sold the most units summed across all places?" }, { "from": "gpt", "value": "The answer is smile" } ], "clip_score": 21.734375, "loss": 4.034922122955322 }, { "id": "identity_300319", "image": "PMC-VQA/images/PMC3078894_F6_92691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the image?\nChoices:\n(A) Microorganisms interactions \n(B) Cellular organelles \n(C) Molecular interactions \n(D) Tissue samples" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.40625, "loss": 5.343953609466553 }, { "id": "identity_291643", "image": "TQA/images/question_images/solar_system_4317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which planet in the solar system is known to be the hottest?\nChoices:\n(A) Mercury\n(B) Mars\n(C) Pluto\n(D) Venus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.53125, "loss": 4.34984827041626 }, { "id": "identity_191491", "image": "PMC-VQA/images/PMC4906968_Fig8_41159.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the marking with the asterisk?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 16.1875, "loss": 4.08188533782959 }, { "id": "identity_49226", "image": "DVQA/images/bar_train_00103255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for 'music'?" }, { "from": "gpt", "value": "The answer is mark" } ], "clip_score": 22.359375, "loss": 4.2212629318237305 }, { "id": "identity_233009", "image": "VQA2.0/images/309319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the graphic on the man's shirt, and how many people are visible in the background?" }, { "from": "gpt", "value": "The answer is red, five" } ], "clip_score": 16.375, "loss": 5.28142786026001 }, { "id": "identity_303614", "image": "PMC-VQA/images/PMC4632949_fig02_440920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the bottom row of the figure?\nChoices:\n(A) Whole heart staining.\n(B) Cell staining.\n(C) Injured tissue.\n(D) Untreated mice." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.046875, "loss": 4.4093852043151855 }, { "id": "identity_186416", "image": "VQA2.0/images/474069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building directly behind the clock tower?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.515625, "loss": 4.31405782699585 }, { "id": "identity_293676", "image": "PMC-VQA/images/PMC6365311_Fig3_432950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging feature distinguishes a mass from a meningioma among the following options: (A) CSF cleft, (B) Dural tail, (C) Wide dural base, or (D) Poorly outlined cortical interface?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.71875, "loss": 4.1720051765441895 }, { "id": "identity_157284", "image": "AI2D/images/262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for the great tit in this food web?" }, { "from": "gpt", "value": "The answer is living oak leaf" } ], "clip_score": 31.671875, "loss": 4.208135604858398 }, { "id": "identity_193777", "image": "VQA2.0/images/470685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots or pans are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.46875, "loss": 4.468797206878662 }, { "id": "identity_129894", "image": "VQA2.0/images/491579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bread slots does the toaster have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.796875, "loss": 4.17197322845459 }, { "id": "identity_4616", "image": "A-OKVQA/images/190018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A small vessel for travelling over water propelled by oars sails or an engine is?\nChoices:\n(A) flight\n(B) boat\n(C) ship\n(D) floater" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.640625, "loss": 4.075401306152344 }, { "id": "identity_206200", "image": "VQA-AS/images/8988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many panes are in the small window?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.921875, "loss": 4.5061211585998535 }, { "id": "identity_34994", "image": "PMC-VQA/images/PMC8607350_FIG3_116842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen within the fat in bilateral feet? \nChoices:\n(A) Blood vessels \n(B) Fibrous strands \n(C) Cartilage \n(D) Muscles " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.921875, "loss": 4.859872341156006 }, { "id": "identity_326692", "image": "TextVQA/images/102be9f03369e1b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who brewed this beer?" }, { "from": "gpt", "value": "The answer is duclaw " } ], "clip_score": 23.59375, "loss": 5.294536113739014 }, { "id": "identity_314544", "image": "A-OKVQA/images/575280.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the young individual here being taught to do?\nChoices:\n(A) ski\n(B) boogie board\n(C) sail\n(D) paddle board" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.671875, "loss": 4.185344219207764 }, { "id": "identity_326668", "image": "IconQA/images/80229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a moon. What is the ninth image?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.46875, "loss": 4.331917762756348 }, { "id": "identity_265402", "image": "AI2D/images/3116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the diagram above?\nChoices:\n(A) Papaya leaf\n(B) Plant species\n(C) Animal cell\n(D) Star formation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.8125, "loss": 4.2264299392700195 }, { "id": "identity_326698", "image": "PMC-VQA/images/PMC4424572_Fig2_384530.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) A 2D cartoon\n(B) A building structure\n(C) A human anatomy model\n(D) A landscape painting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.6796875, "loss": 4.120053291320801 }, { "id": "identity_38239", "image": "DVQA/images/bar_train_00193333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is prize" } ], "clip_score": 18.78125, "loss": 4.009669303894043 }, { "id": "identity_28866", "image": "VizWiz/images/VizWiz_train_00017189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Tell me what this dog looks like" }, { "from": "gpt", "value": "The answer is lab" } ], "clip_score": 26.578125, "loss": 4.246025085449219 }, { "id": "identity_333318", "image": "A-OKVQA/images/50927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which team is currently batting?\nChoices:\n(A) brooklyn dodgers\n(B) seattle mariners\n(C) boston beaneaters\n(D) cleveland spiders" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.034174919128418 }, { "id": "identity_131548", "image": "A-OKVQA/images/52661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the large yacht in the image?" }, { "from": "gpt", "value": "The answer is Mega Zatara" } ], "clip_score": 24.28125, "loss": 4.319949150085449 }, { "id": "identity_187447", "image": "A-OKVQA/images/181836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is predominant on the ties the person is wearing?" }, { "from": "gpt", "value": "The answer is Striped" } ], "clip_score": 18.515625, "loss": 4.038506031036377 }, { "id": "identity_336188", "image": "IconQA/images/19853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.78125, "loss": 4.083284854888916 }, { "id": "identity_220943", "image": "VQA-AS/images/18633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman doing with her hands?" }, { "from": "gpt", "value": "The answer is Gesturing or explaining" } ], "clip_score": 17.84375, "loss": 4.1840972900390625 }, { "id": "identity_195793", "image": "A-OKVQA/images/368148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background, excluding the blurred face?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.171875, "loss": 4.9015655517578125 }, { "id": "identity_297365", "image": "IconQA/images/78279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) certain\n(B) unlikely\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.828125, "loss": 4.319338798522949 }, { "id": "identity_20300", "image": "A-OKVQA/images/292730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a candle in the woman's dessert?\nChoices:\n(A) to trick\n(B) to celebrate\n(C) as joke\n(D) for light" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.328037261962891 }, { "id": "identity_298732", "image": "IconQA/images/37468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the time shown by filling in the blank with a time word, not a number. It is (_) to one." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 24.71875, "loss": 4.430002689361572 }, { "id": "identity_207665", "image": "PMC-VQA/images/PMC7802110_FIG2_340735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 27.8125, "loss": 4.316164016723633 }, { "id": "identity_30044", "image": "A-OKVQA/images/147681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where did the skateboarder begin this move from?\nChoices:\n(A) up above\n(B) mid level\n(C) grassy area\n(D) bottom step" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.796875, "loss": 4.03614616394043 }, { "id": "identity_337076", "image": "A-OKVQA/images/287305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is this?\nChoices:\n(A) tropical\n(B) residential\n(C) country\n(D) city" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.3671875, "loss": 4.033114433288574 }, { "id": "identity_78721", "image": "TQA/images/question_images/parts_skeleton_7273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bones are labeled in the arm region?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.90625, "loss": 4.681647300720215 }, { "id": "identity_321463", "image": "DVQA/images/bar_train_00031006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is test" } ], "clip_score": 18.984375, "loss": 4.062359809875488 }, { "id": "identity_326728", "image": "DVQA/images/bar_train_00167366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of bars have at least one bar with a value less than 9?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 20.015625, "loss": 4.132403373718262 }, { "id": "identity_248060", "image": "DVQA/images/bar_train_00090075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across all shops, and which item has the highest total units sold?" }, { "from": "gpt", "value": "The answer is random" } ], "clip_score": 35.0, "loss": 4.650875568389893 }, { "id": "identity_327719", "image": "TQA/images/abc_question_images/sun_layers_16310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is between the convection zone and the outer layer?\nChoices:\n(A) M\n(B) P\n(C) J\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 4.221480369567871 }, { "id": "identity_286845", "image": "PMC-VQA/images/PMC5303254_Fig3_126212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the panel shown in the image? Is it:\n(A) Blue\n(B) Green\n(C) Yellow\n(D) Red" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.23828125, "loss": 4.245682239532471 }, { "id": "identity_221577", "image": "TextVQA/images/859746175448ab2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first item on the list under the \"Lists\" section?" }, { "from": "gpt", "value": "The answer is Most active, new lists, older, other - empty lists" } ], "clip_score": 17.78125, "loss": 5.132645606994629 }, { "id": "identity_329829", "image": "PMC-VQA/images/PMC5537248_Fig2_197740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the occipital lesion under non-enhanced CT?\nChoices:\n(A) Enlargement \n(B) Calcification \n(C) Inflammation \n(D) Bleeding" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.09375, "loss": 4.4768757820129395 }, { "id": "identity_31687", "image": "IconQA/images/94422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.5, "loss": 4.554657936096191 }, { "id": "identity_303642", "image": "PMC-VQA/images/PMC8274059_Fig1_7808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific feature of the lesion mentioned in the CT scan caption?\nChoices:\n(A) Bony overgrowth\n(B) Cortical bone resorption\n(C) Enlarged pulp chamber\n(D) Tooth mobility" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.125, "loss": 5.081847190856934 }, { "id": "identity_335236", "image": "DocVQA/images/xrcy0227_96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organization?" }, { "from": "gpt", "value": "The answer is Citizens Comm on Science Law and Food Supply" } ], "clip_score": 18.1875, "loss": 4.631060600280762 }, { "id": "identity_274710", "image": "FigureQA/images/80376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest: Light Salmon? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.234375, "loss": 4.044886589050293 }, { "id": "identity_276569", "image": "A-OKVQA/images/321026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of byway is depicted in the image provided?\nChoices:\n(A) freeway\n(B) raceway\n(C) nature path\n(D) railway" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.1171875, "loss": 4.443594932556152 }, { "id": "identity_196642", "image": "A-OKVQA/images/234717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the roof of the building on the hill?" }, { "from": "gpt", "value": "The answer is Dark gray or black" } ], "clip_score": 16.734375, "loss": 4.0642266273498535 }, { "id": "identity_276449", "image": "FigureQA/images/40536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Crimson or Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.0625, "loss": 4.6001129150390625 }, { "id": "identity_188046", "image": "PMC-VQA/images/PMC8628426_Fig11_125868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many star markers are present in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.59375, "loss": 4.60903787612915 }, { "id": "identity_32347", "image": "A-OKVQA/images/435579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who watches these people while they board on snow?\nChoices:\n(A) judges\n(B) coal miners\n(C) no one\n(D) enemies" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.09375, "loss": 4.685574531555176 }, { "id": "identity_131658", "image": "Super-CLEVR/images/superCLEVR_new_000624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.21875, "loss": 4.742441654205322 }, { "id": "identity_289329", "image": "PMC-VQA/images/PMC6745377_jmrs348-fig-0005_37887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is represented by the solid arrow?\nChoices:\n(A) Biopsy marker \n(B) Calcifications \n(C) Seed \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.640625, "loss": 5.251730442047119 }, { "id": "identity_147595", "image": "DocVQA/images/qghv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the first presentation in the Product Approval Process session?" }, { "from": "gpt", "value": "The answer is The System Is Working" } ], "clip_score": 23.0625, "loss": 4.642753601074219 }, { "id": "identity_70505", "image": "TQA/images/abc_question_images/ocean_waves_17119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label P represent in the waveform diagram?" }, { "from": "gpt", "value": "The answer is Equilibrium position" } ], "clip_score": 31.8125, "loss": 4.030004024505615 }, { "id": "identity_156866", "image": "AI2D/images/4496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the leaf margin that has deep, sharp indentations?" }, { "from": "gpt", "value": "The answer is Cleft" } ], "clip_score": 26.5, "loss": 4.402270793914795 }, { "id": "identity_13339", "image": "PMC-VQA/images/PMC8102820_f1_435049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain showed patchy lesions? \nChoices:\n(A) Temporal lobe \n(B) Cerebellum \n(C) Medulla oblongata \n(D) Occipital lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.984375, "loss": 4.559832572937012 }, { "id": "identity_283948", "image": "PMC-VQA/images/PMC6920455_Fig3_86881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which feature can be observed in heart tissue among the following options: \n(A) Lesions \n(B) Contrast uptake \n(C) Airways \n(D) Tissue preservation." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.265625, "loss": 4.574057579040527 }, { "id": "identity_134506", "image": "DVQA/images/bar_train_00051537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is kind" } ], "clip_score": 26.90625, "loss": 4.012770652770996 }, { "id": "identity_225326", "image": "ScienceQA/images/1374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital" }, { "from": "gpt", "value": "The answer is Boise" } ], "clip_score": 27.96875, "loss": 4.114108085632324 }, { "id": "identity_143834", "image": "DocVQA/images/fpjw0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the marketing person responsible for the project?" }, { "from": "gpt", "value": "The answer is Cressida Lozano" } ], "clip_score": 17.609375, "loss": 4.027449131011963 }, { "id": "identity_132055", "image": "VQA-RAD/images/synpic25821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the patient in the image?" }, { "from": "gpt", "value": "The answer is KAYA LIN" } ], "clip_score": 23.671875, "loss": 5.07970666885376 }, { "id": "identity_291181", "image": "ScienceQA/images/2545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object have: sticky or rough?\nChoices:\n(A) sticky\n(B) rough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.90625, "loss": 4.286445617675781 }, { "id": "identity_269326", "image": "FigureQA/images/15963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the color with the highest median value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.15625, "loss": 4.663814544677734 }, { "id": "identity_34676", "image": "PMC-VQA/images/PMC7926624_children-08-00163-f001_382775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the black arrow pointing to in the left panel? \nChoices:\n(A) Left-sided diaphragmatic defect, \n(B) Right-sided diaphragm, \n(C) Intrathoracic intestine, \n(D) None of the above options. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.234375, "loss": 4.831719398498535 }, { "id": "identity_279016", "image": "FigureQA/images/32299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the median with the highest value?\nChoices:\n(A) No\n(B) Yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 4.356814384460449 }, { "id": "identity_271353", "image": "FigureQA/images/28364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Cyan or Olive?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.140625, "loss": 4.005204200744629 }, { "id": "identity_265619", "image": "VizWiz/images/VizWiz_train_00019106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of frozen dinner this is?" }, { "from": "gpt", "value": "The answer is some pasta dinner" } ], "clip_score": 26.21875, "loss": 4.451539516448975 }, { "id": "identity_300779", "image": "ScienceQA/images/5955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Concord\n(B) Providence\n(C) Annapolis\n(D) Montpelier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 4.088967800140381 }, { "id": "identity_326416", "image": "A-OKVQA/images/485822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is happening here?\nChoices:\n(A) car speeding\n(B) car show\n(C) motorcycle parade\n(D) motorcycle sale" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.6875, "loss": 4.150026798248291 }, { "id": "identity_325801", "image": "A-OKVQA/images/448648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place?\nChoices:\n(A) underground\n(B) city\n(C) forest\n(D) sky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9765625, "loss": 4.353348731994629 }, { "id": "identity_287148", "image": "FigureQA/images/56296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold considered the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.484375, "loss": 4.3111653327941895 }, { "id": "identity_292565", "image": "PMC-VQA/images/PMC3996258_F3_283260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color represents the surface charge in the image of PvD1r? \nChoices:\n(A) Positive surface charge\n(B) Negative surface charge\n(C) Neutral surface charge\n(D) No surface charge is depicted" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.246694564819336 }, { "id": "identity_31060", "image": "ScienceQA/images/14571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Hawaii?\nChoices:\n(A) Baton Rouge\n(B) Phoenix\n(C) Honolulu\n(D) Hilo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.25, "loss": 4.2227959632873535 }, { "id": "identity_285072", "image": "VQA2.0/images/426500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the ground dry in appearance?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 11.984375, "loss": 4.233165264129639 }, { "id": "identity_66816", "image": "TQA/images/question_images/human_system_respiratory_2858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lobes are present in the right lung according to the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 34.125, "loss": 4.161038398742676 }, { "id": "identity_266970", "image": "Super-CLEVR/images/superCLEVR_new_000695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a smaller number of small green matte minivans compared to sedans?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.421875, "loss": 5.039587497711182 }, { "id": "identity_259142", "image": "ScienceQA/images/7901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, and then name the capital of the state directly south of it" }, { "from": "gpt", "value": "The answer is Boston" } ], "clip_score": 26.71875, "loss": 4.21312952041626 }, { "id": "identity_289282", "image": "A-OKVQA/images/524325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material are the items that people are looking at most likely made of?\nChoices:\n(A) mud\n(B) cloth\n(C) brick\n(D) stone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.720703125, "loss": 4.468670845031738 }, { "id": "identity_23717", "image": "PMC-VQA/images/PMC7073646_jcm-09-00303-f002_134954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented on the fused image? \nChoices:\n(A) The position of the implant placement \n(B) The region where a biopsy was taken \n(C) The location of tooth decay \n(D) The position of a dental filling " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.328125, "loss": 4.5650482177734375 }, { "id": "identity_131066", "image": "Super-CLEVR/images/superCLEVR_new_000112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the blue bicycle?" }, { "from": "gpt", "value": "The answer is A white bus" } ], "clip_score": 18.09375, "loss": 4.345505237579346 }, { "id": "identity_331409", "image": "PMC-VQA/images/PMC8069140_ijms-22-03867-f007_425738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging was used to observe this activity?\nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.3125, "loss": 4.491372108459473 }, { "id": "identity_325506", "image": "IconQA/images/63953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the sixth image?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.125, "loss": 4.042490482330322 }, { "id": "identity_31743", "image": "A-OKVQA/images/21036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue float look like?\nChoices:\n(A) lobster\n(B) horse\n(C) cow\n(D) egg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.28125, "loss": 4.191547393798828 }, { "id": "identity_305805", "image": "Super-CLEVR/images/superCLEVR_new_000949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny brown aeroplane made of the same material as the tiny brown object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.59375, "loss": 5.482856273651123 }, { "id": "identity_287447", "image": "AI2D/images/3431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the ovary part of the stamen? \nChoices:\n(A) No, the ovary is part of the pistil.\n(B) No, the ovary is found in women.\n(C) No, the ovary is what doctors examine.\n(D) Yes, the ovary is part of the stamen." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.859375, "loss": 4.108293056488037 }, { "id": "identity_113239", "image": "VQA-AS/images/5789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toy cars are visible on the rug?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.109375, "loss": 5.1659626960754395 }, { "id": "identity_317596", "image": "PMC-VQA/images/PMC8886146_F3_210422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arcuate fascicle seen wrapping around here?\nChoices:\n(A) Thalamus\n(B) Hypothalamus\n(C) Posterior insular point\n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.3125, "loss": 4.095215797424316 }, { "id": "identity_270622", "image": "PMC-VQA/images/PMC8166577_Fig2_456921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed in the sonogram regarding the adnexal mass?\nChoices:\n(A) Single-layer echogenic mass \n(B) Three-layered mass \n(C) Four-layer echogenic mass \n(D) Hypoechoic mass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.275426387786865 }, { "id": "identity_272506", "image": "DocVQA/images/ttng0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Russell M. Wilder located?" }, { "from": "gpt", "value": "The answer is ex officio" } ], "clip_score": 17.765625, "loss": 4.079533100128174 }, { "id": "identity_302235", "image": "A-OKVQA/images/296233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which wrestler would be most likely to wear the garb the people on the left have on?\nChoices:\n(A) john cena\n(B) drew mcintyre\n(C) jinder mahal\n(D) kofi kingston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3203125, "loss": 4.282503128051758 }, { "id": "identity_191551", "image": "A-OKVQA/images/205486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visibly holding hats?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.90625, "loss": 4.341605186462402 }, { "id": "identity_211071", "image": "VQA2.0/images/331780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.859375, "loss": 4.032259941101074 }, { "id": "identity_249740", "image": "DVQA/images/bar_train_00104756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'noise' category, and what is its accuracy value?" }, { "from": "gpt", "value": "The answer is nature, 9" } ], "clip_score": 32.71875, "loss": 4.292975902557373 }, { "id": "identity_272599", "image": "PMC-VQA/images/PMC5004802_pone.0162271.g001_67944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What information is displayed in the middle and bottom rows of the data?\nChoices:\n(A) Visual acuity tests \n(B) The participant's brain activity \n(C) The participant's affected areas \n(D) Different sections of the brain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.734375, "loss": 4.043672561645508 }, { "id": "identity_62013", "image": "DocVQA/images/mmfm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person to whom the telegram is addressed?" }, { "from": "gpt", "value": "The answer is William Darby MD" } ], "clip_score": 27.328125, "loss": 5.429904460906982 }, { "id": "identity_37535", "image": "A-OKVQA/images/514904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What surrounds the sausages here?\nChoices:\n(A) buns\n(B) barbeque sauce\n(C) smiles\n(D) chips" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.71875, "loss": 4.176183223724365 }, { "id": "identity_62651", "image": "DocVQA/images/nhxj0037_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of using mono potassium phosphate as mentioned in the document?" }, { "from": "gpt", "value": "The answer is as the fluxing agent" } ], "clip_score": 19.5625, "loss": 4.173216819763184 }, { "id": "identity_319652", "image": "A-OKVQA/images/80543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person in black crouched?\nChoices:\n(A) to dive\n(B) to jump\n(C) gain speed\n(D) to sit" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.1484375, "loss": 4.074098110198975 }, { "id": "identity_129011", "image": "A-OKVQA/images/573784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object to the left of the fireplace?" }, { "from": "gpt", "value": "The answer is A lamp" } ], "clip_score": 26.625, "loss": 4.863862037658691 }, { "id": "identity_151050", "image": "ScienceQA/images/14436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lemons can be seen in the glass of lemonade?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.453125, "loss": 4.0808186531066895 }, { "id": "identity_231403", "image": "DocVQA/images/ykdb0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the arbitrary grading scale used for the changes observed in the liver, lung, and brain, and what is the contention about the histologic changes in the treated groups compared to the control?" }, { "from": "gpt", "value": "The answer is Grading scale: 0 to ++, changes similar to control" } ], "clip_score": 13.515625, "loss": 4.173802375793457 }, { "id": "identity_128630", "image": "Super-CLEVR/images/superCLEVR_new_001689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the motorcycle that is to the right of the green object?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.109375, "loss": 4.9707770347595215 }, { "id": "identity_264967", "image": "TQA/images/abc_question_images/muscle_fiber_17088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure connects the muscle to the bone?\nChoices:\n(A) MITOCHONDRION\n(B) TENDON\n(C) FIBER\n(D) TISSUE" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.1875, "loss": 4.130894660949707 }, { "id": "identity_44842", "image": "MapQA/images/map_1471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the lowest percentage range category?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 27.203125, "loss": 4.021089553833008 }, { "id": "identity_329368", "image": "A-OKVQA/images/58143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sound will the people in the apartments hear through their windows?\nChoices:\n(A) people talking\n(B) bikers\n(C) man walking\n(D) train" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.859375, "loss": 4.519326686859131 }, { "id": "identity_4522", "image": "DVQA/images/bar_train_00174539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.21875, "loss": 4.609152793884277 }, { "id": "identity_281352", "image": "DocVQA/images/sthd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which month of the year is referenced on the page?" }, { "from": "gpt", "value": "The answer is november" } ], "clip_score": 19.34375, "loss": 5.011080741882324 }, { "id": "identity_332404", "image": "A-OKVQA/images/554636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of business is advertised in white letters on this building?\nChoices:\n(A) electronics store\n(B) sports retailer\n(C) food chain\n(D) repair shop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.3671875, "loss": 4.023526191711426 }, { "id": "identity_136980", "image": "DocVQA/images/rlvh0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.3125, "loss": 4.106753349304199 }, { "id": "identity_302359", "image": "FigureQA/images/28272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.59375, "loss": 4.02961540222168 }, { "id": "identity_278711", "image": "A-OKVQA/images/52305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation shown in the image would be the quickest way to reach a destination?\nChoices:\n(A) walking\n(B) airplane\n(C) boat\n(D) pickup" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.5625, "loss": 4.245955944061279 }, { "id": "identity_316306", "image": "A-OKVQA/images/350303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word would best describe this place?\nChoices:\n(A) fancy\n(B) miniature\n(C) dilapidated\n(D) messy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.25, "loss": 4.558743476867676 }, { "id": "identity_206714", "image": "A-OKVQA/images/293505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible on the road?" }, { "from": "gpt", "value": "The answer is One animal" } ], "clip_score": 19.515625, "loss": 4.670228481292725 }, { "id": "identity_283215", "image": "A-OKVQA/images/291140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the frequent need to repair beach houses?\nChoices:\n(A) law\n(B) nosy neighbors\n(C) environmental wear\n(D) beauty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.3561835289001465 }, { "id": "identity_65154", "image": "TQA/images/abc_question_images/parts_worm_17313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled U?" }, { "from": "gpt", "value": "The answer is It allows passage of sperm and a baby" } ], "clip_score": 26.734375, "loss": 4.448300361633301 }, { "id": "identity_290382", "image": "PMC-VQA/images/PMC6492342_Fig1_473846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality displays a solitary hyperintense signal in the splenium?\nChoices:\n(A) T1-weighted imaging\n(B) T2-weighted imaging\n(C) FLAIR imaging\n(D) DWI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.03125, "loss": 4.154355525970459 }, { "id": "identity_224796", "image": "VQA-AS/images/626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs of the dog are visible, and what is the person holding?" }, { "from": "gpt", "value": "The answer is Two legs, a plate" } ], "clip_score": 25.828125, "loss": 4.523472309112549 }, { "id": "identity_247025", "image": "ScienceQA/images/11619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital" }, { "from": "gpt", "value": "The answer is Saint Paul" } ], "clip_score": 28.953125, "loss": 4.174565315246582 }, { "id": "identity_311528", "image": "IconQA/images/5498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this shape open or not?\nChoices:\n(A) closed\n(B) open" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 4.112781524658203 }, { "id": "identity_286064", "image": "Super-CLEVR/images/superCLEVR_new_001446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more yellow objects to the left of the matte sedan than brown cruisers behind the large brown object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 30.4375, "loss": 5.059237480163574 }, { "id": "identity_326000", "image": "PMC-VQA/images/PMC5946363_F1_313145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the postoperative CT scan indicate?\nChoices:\n(A) Improvement in the infarcted tissue \n(B) No change in the infarcted tissue \n(C) Swelling of the infarcted tissue \n(D) Decreased mass effect on adjacent structures" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.265625, "loss": 4.182109832763672 }, { "id": "identity_3107", "image": "VQA2.0/images/336077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the stadium crowded?" }, { "from": "gpt", "value": "The answer is baseball game" } ], "clip_score": 24.765625, "loss": 5.278944492340088 }, { "id": "identity_77862", "image": "AI2D/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for the food web depicted in the image?" }, { "from": "gpt", "value": "The answer is Leaves" } ], "clip_score": 32.375, "loss": 4.124300003051758 }, { "id": "identity_306522", "image": "A-OKVQA/images/166864.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these boats made out of?\nChoices:\n(A) metal\n(B) legos\n(C) plasma\n(D) ceramic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.46875, "loss": 4.168745994567871 }, { "id": "identity_336275", "image": "A-OKVQA/images/390524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What hobby does the person looking up enjoy?\nChoices:\n(A) kites\n(B) painting\n(C) football\n(D) acting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.375, "loss": 4.1666178703308105 }, { "id": "identity_301530", "image": "AI2D/images/160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled as D in the diagram?\nChoices:\n(A) moon\n(B) earth\n(C) sun\n(D) tides" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.859375, "loss": 4.441590309143066 }, { "id": "identity_263936", "image": "DVQA/images/bar_train_00037100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the second bar from the left in every group?" }, { "from": "gpt", "value": "The answer is code" } ], "clip_score": 17.015625, "loss": 4.390048027038574 }, { "id": "identity_337569", "image": "PMC-VQA/images/PMC7035667_Fig1_122778.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to visualize this condition?\nChoices:\n(A) CT\n(B) MRI\n(C) X-Ray\n(D) PET" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.171875, "loss": 4.1406049728393555 }, { "id": "identity_214224", "image": "A-OKVQA/images/313948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.875, "loss": 5.311230659484863 }, { "id": "identity_268205", "image": "A-OKVQA/images/358462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the horse being connected to the wagon?\nChoices:\n(A) by accident\n(B) eats grass\n(C) pulls wagon\n(D) stops horse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.875, "loss": 4.458822727203369 }, { "id": "identity_199403", "image": "A-OKVQA/images/66166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vendor's apron?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.3125, "loss": 4.213639259338379 }, { "id": "identity_774", "image": "DVQA/images/bar_train_00146454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the left?" }, { "from": "gpt", "value": "The answer is death" } ], "clip_score": 25.3125, "loss": 4.074061393737793 }, { "id": "identity_305686", "image": "FigureQA/images/83338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.1875, "loss": 4.427180290222168 }, { "id": "identity_277925", "image": "DVQA/images/bar_train_00117344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.03125, "loss": 4.318546295166016 }, { "id": "identity_208769", "image": "PMC-VQA/images/PMC6626925_F2_7538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal view" } ], "clip_score": 20.875, "loss": 4.0140604972839355 }, { "id": "identity_334373", "image": "PMC-VQA/images/PMC7841286_F1_354972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which eye is depicted in the image?\nChoices:\n(A) Left eye\n(B) Right eye\n(C) Both eyes\n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.78125, "loss": 4.210751533508301 }, { "id": "identity_34431", "image": "A-OKVQA/images/442040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the patrons unable to ski or snowboard?\nChoices:\n(A) ski lift\n(B) grass\n(C) snow\n(D) lodge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.9375, "loss": 4.310849189758301 }, { "id": "identity_111133", "image": "VQA2.0/images/561950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dunes are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.203125, "loss": 4.424977779388428 }, { "id": "identity_5379", "image": "VQA2.0/images/546940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the woman's umbrella?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 20.4375, "loss": 4.039152145385742 }, { "id": "identity_305100", "image": "IconQA/images/9735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.40625, "loss": 4.065956115722656 }, { "id": "identity_35927", "image": "A-OKVQA/images/9226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white poster near the woman used for?\nChoices:\n(A) mailing\n(B) painting\n(C) selling\n(D) advertising" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.90625, "loss": 4.2055583000183105 }, { "id": "identity_319974", "image": "TextVQA/images/bfcdcb3bf1013ebe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of this book by the author?" }, { "from": "gpt", "value": "The answer is recklessness" } ], "clip_score": 29.359375, "loss": 4.738242149353027 }, { "id": "identity_262686", "image": "A-OKVQA/images/516414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which yellow item is located on the boat among the following choices: stockings, bananas, net, or mustard?\nChoices:\n(A) stockings\n(B) bananas\n(C) net\n(D) mustard" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.1875, "loss": 4.387899398803711 }, { "id": "identity_7290", "image": "TextVQA/images/20e5bcf73f20bdb6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what single letter do you see at the top right of the box?" }, { "from": "gpt", "value": "The answer is sinful color" } ], "clip_score": 10.9375, "loss": 5.838161945343018 }, { "id": "identity_276295", "image": "A-OKVQA/images/359563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item on the counter is pink? Is it:\n(A) scissor handle\n(B) spoon\n(C) napkin\n(D) fork" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.1726975440979 }, { "id": "identity_292138", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g004_308561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dotted line in the diagram represent?\nChoices:\n(A) The position of the brain,\n(B) The position of the axons,\n(C) The position of the ganglia,\n(D) The position of the larvae." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.609375, "loss": 4.178384780883789 }, { "id": "identity_274238", "image": "DVQA/images/bar_train_00168334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the lime color?" }, { "from": "gpt", "value": "The answer is work" } ], "clip_score": 23.09375, "loss": 4.017224311828613 }, { "id": "identity_311611", "image": "IconQA/images/12219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to stop on?\nChoices:\n(A) yellow\n(B) white\n(C) neither; white and yellow are equally likely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.109375, "loss": 4.061638832092285 }, { "id": "identity_5828", "image": "DVQA/images/bar_train_00151746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of grain smaller than cup?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.84375, "loss": 4.182896137237549 }, { "id": "identity_222398", "image": "A-OKVQA/images/55425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorbikes are parked on the sidewalk?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 30.78125, "loss": 4.221779823303223 }, { "id": "identity_9984", "image": "ScienceQA/images/16621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) fuzzy\n(B) shiny\n(C) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.75, "loss": 4.303179740905762 }, { "id": "identity_302409", "image": "TextVQA/images/5cc2be2963e03cff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second step?" }, { "from": "gpt", "value": "The answer is indexing" } ], "clip_score": 20.71875, "loss": 4.721585750579834 }, { "id": "identity_56683", "image": "DocVQA/images/tjxg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word of the second paragraph?" }, { "from": "gpt", "value": "The answer is greatest" } ], "clip_score": 15.5859375, "loss": 6.131613731384277 }, { "id": "identity_33637", "image": "IconQA/images/71880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.373134613037109 }, { "id": "identity_194738", "image": "PMC-VQA/images/PMC3744490_pone-0072265-g004_224985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object located in the center of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.046875, "loss": 4.008429527282715 }, { "id": "identity_321302", "image": "A-OKVQA/images/564830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens when the items in these vertical stacks are used?\nChoices:\n(A) games happen\n(B) nothing\n(C) music plays\n(D) complaining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.9453125, "loss": 4.296771049499512 }, { "id": "identity_11723", "image": "TQA/images/question_images/human_system_eye_6130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pupil?\nChoices:\n(A) Behind the cornea\n(B) Before the Choroid\n(C) Over the iris\n(D) Below optic nerve" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.65625, "loss": 4.16538667678833 }, { "id": "identity_332690", "image": "PMC-VQA/images/PMC9117747_F1_285061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristics of the vascular access can be examined by visualizing or removing different layers?\nChoices:\n(A) Color \n(B) Size \n(C) Smell \n(D) Taste" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 5.014854907989502 }, { "id": "identity_294998", "image": "A-OKVQA/images/495568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Whose information can be found on the tent that can be listened to?\nChoices:\n(A) politician\n(B) symphony\n(C) radio\n(D) indie mixtape" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.203125, "loss": 4.240169525146484 }, { "id": "identity_299029", "image": "PMC-VQA/images/PMC7564854_F0003_275748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is adjacent to the right acromion process? \nChoices:\n(A) Infraspinatus\n(B) Subscapularis\n(C) Supraspinatus\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.71875, "loss": 4.322504997253418 }, { "id": "identity_329454", "image": "ScienceQA/images/11329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Columbia\n(B) Frankfort\n(C) Huntington\n(D) Charleston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.390625, "loss": 4.918841361999512 }, { "id": "identity_1162", "image": "A-OKVQA/images/184431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the vehicle on the right called?\nChoices:\n(A) van\n(B) train\n(C) bike\n(D) car" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.015625, "loss": 4.3968825340271 }, { "id": "identity_22995", "image": "A-OKVQA/images/37514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the oval object on the bike tire used for?\nChoices:\n(A) protection\n(B) esthetics\n(C) carrying things\n(D) mud flap" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.328125, "loss": 4.579538822174072 }, { "id": "identity_24061", "image": "PlotQA/images/49906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of the Y-axis ?" }, { "from": "gpt", "value": "The answer is Country" } ], "clip_score": 26.140625, "loss": 4.097661972045898 }, { "id": "identity_14992", "image": "FigureQA/images/53222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5, "loss": 4.086550712585449 }, { "id": "identity_123728", "image": "PMC-VQA/images/PMC9002522_polymers-14-01462-f001_251261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"RC\" refer to in the image?" }, { "from": "gpt", "value": "The answer is Root Cell or Resin Composite (context-dependent)" } ], "clip_score": 27.0625, "loss": 4.098848819732666 }, { "id": "identity_293703", "image": "Super-CLEVR/images/superCLEVR_new_001143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the motorbike in front of the cyan rubber scooter the same as the size of the cyan rubber object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.53125, "loss": 4.999724388122559 }, { "id": "identity_300337", "image": "PMC-VQA/images/PMC7398228_Fig2_224914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the triangle indicate in the 3D RCA development image?\nChoices:\n(A) normal PComA origin\n(B) aneurysm\n(C) normal PCA origin\n(D) RCA system origin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.296875, "loss": 4.679562568664551 }, { "id": "identity_216749", "image": "A-OKVQA/images/172886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light fixtures are visible in the bathroom?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.0625, "loss": 4.436649322509766 }, { "id": "identity_317073", "image": "ScienceQA/images/12155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Louisville\n(B) Lexington\n(C) Oklahoma City\n(D) Frankfort" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.75, "loss": 4.059879779815674 }, { "id": "identity_269455", "image": "Super-CLEVR/images/superCLEVR_new_000663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects made of the same material as the red object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.546875, "loss": 5.120847702026367 }, { "id": "identity_152129", "image": "TQA/images/question_images/tides_149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the areas labeled as low tide experiencing?" }, { "from": "gpt", "value": "The answer is The lowest level of water" } ], "clip_score": 31.328125, "loss": 4.000951290130615 }, { "id": "identity_189164", "image": "PMC-VQA/images/PMC7646758_f1_292786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the arrow on the chest X-ray?" }, { "from": "gpt", "value": "The answer is Consolidation" } ], "clip_score": 32.90625, "loss": 4.003218650817871 }, { "id": "identity_195175", "image": "PMC-VQA/images/PMC4689804_F1_455978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the letter 'G' in the corner of the image?" }, { "from": "gpt", "value": "The answer is Likely denotes position in a series or specific orientation" } ], "clip_score": 14.9921875, "loss": 4.018771648406982 }, { "id": "identity_6487", "image": "VizWiz/images/VizWiz_train_00012404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's this a picture of?" }, { "from": "gpt", "value": "The answer is paper" } ], "clip_score": 19.0, "loss": 4.7256269454956055 }, { "id": "identity_298688", "image": "ScienceQA/images/19409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Laramie\n(B) Honolulu\n(C) Hilo\n(D) Boston" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.289621353149414 }, { "id": "identity_266604", "image": "TextVQA/images/798f13861d60dbcd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the researchers' feelings when this model is implemented and followed?" }, { "from": "gpt", "value": "The answer is happy" } ], "clip_score": 26.046875, "loss": 4.236814975738525 }, { "id": "identity_3501", "image": "Super-CLEVR/images/superCLEVR_new_001400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a dirtbike on the left side of the red rubber biplane; are there any gray buss that are on the left side of it?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.15625, "loss": 5.209317207336426 }, { "id": "identity_114041", "image": "VizWiz/images/VizWiz_train_00006737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many white spherical objects are in the glass bowl?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 14.9375, "loss": 4.897104740142822 }, { "id": "identity_330624", "image": "A-OKVQA/images/312182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people doing?\nChoices:\n(A) singing\n(B) marching\n(C) swimming\n(D) protesting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.59375, "loss": 4.099231719970703 }, { "id": "identity_111124", "image": "Super-CLEVR/images/superCLEVR_new_001820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is in front of the blue bike?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 27.25, "loss": 5.250471115112305 }, { "id": "identity_284910", "image": "PMC-VQA/images/PMC4140120_fig1_314517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which tissue type is depicted in the image provided?\nOptions:\n(A) Kidney tissue\n(B) Brain tissue\n(C) Lung tissue\n(D) Heart tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.796875, "loss": 4.683169364929199 }, { "id": "identity_292421", "image": "A-OKVQA/images/477919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what type of environment are these fruits cultivated?\nChoices:\n(A) tropical\n(B) desert\n(C) tundra\n(D) temperate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.84375, "loss": 4.2082109451293945 }, { "id": "identity_28352", "image": "A-OKVQA/images/534734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is playing tennis with the boy wearing yellow pants?\nChoices:\n(A) woman\n(B) young man\n(C) boy\n(D) old man" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.484375, "loss": 4.140645980834961 }, { "id": "identity_38367", "image": "A-OKVQA/images/77953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What place looks most similar to this?\nChoices:\n(A) siberia\n(B) egypt\n(C) missouri\n(D) venice" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.7734375, "loss": 4.031005859375 }, { "id": "identity_30024", "image": "PMC-VQA/images/PMC9153154_Fig5_298031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black circle indicate in the image? \nChoices:\n(A) Exos injection site \n(B) iRGD injection site \n(C) Tumor site \n(D) SPECT/CT machine location " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.524021625518799 }, { "id": "identity_300454", "image": "A-OKVQA/images/171950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this game happening?\nChoices:\n(A) mets\n(B) astros\n(C) yankee\n(D) cubs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.046875, "loss": 4.226387023925781 }, { "id": "identity_18488", "image": "VizWiz/images/VizWiz_train_00014628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of speaker is this and how many speakers are there? " }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.3125, "loss": 4.465932846069336 }, { "id": "identity_266476", "image": "A-OKVQA/images/218482.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item is located in the far back right corner?\nChoices:\n(A) baseball stadium\n(B) statue\n(C) elephant\n(D) van" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.203125, "loss": 4.063565254211426 }, { "id": "identity_261308", "image": "TQA/images/abc_question_images/earth_eclipses_11627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the Zone of Totality?\nChoices:\n(A) H\n(B) N\n(C) M\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.40625, "loss": 4.330316543579102 }, { "id": "identity_28157", "image": "PMC-VQA/images/PMC7018895_Fig2_117014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does the lesion appear on T2-weighted imaging? \nChoices:\n(A) Hyperintense \n(B) Hypointense \n(C) Isointense \n(D) Heterogeneous " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 4.056469917297363 }, { "id": "identity_258111", "image": "TextVQA/images/01ed9f00b2d3265e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the third word in the second line on the phone screen?" }, { "from": "gpt", "value": "The answer is the" } ], "clip_score": 14.65625, "loss": 4.417015075683594 }, { "id": "identity_28236", "image": "ScienceQA/images/5688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the bird below.\nChoices:\n(A) mandarinfish\n(B) American bullfrog\n(C) emerald hummingbird\n(D) California toad" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0703125, "loss": 4.2103400230407715 }, { "id": "identity_132075", "image": "VQA-AS/images/7110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.5, "loss": 4.266882419586182 }, { "id": "identity_105660", "image": "A-OKVQA/images/114025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are visible on the kite?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 24.765625, "loss": 4.080215930938721 }, { "id": "identity_334112", "image": "IconQA/images/97113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the ninth image?\nChoices:\n(A) house\n(B) dog\n(C) paw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.453125, "loss": 4.178523540496826 }, { "id": "identity_206606", "image": "A-OKVQA/images/147568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baseball bats can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.5, "loss": 4.303194046020508 }, { "id": "identity_202578", "image": "A-OKVQA/images/27777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the red boat docked at the pier?" }, { "from": "gpt", "value": "The answer is KALIYUGS" } ], "clip_score": 28.875, "loss": 4.279667854309082 }, { "id": "identity_70770", "image": "AI2D/images/3128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the spaces labeled as \"INTERCELLULAR SPACES\" in the diagram indicate?" }, { "from": "gpt", "value": "The answer is Gaps for gas diffusion" } ], "clip_score": 30.1875, "loss": 4.621171951293945 }, { "id": "identity_143067", "image": "MapQA/images/map_4031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the highest value range on the map?" }, { "from": "gpt", "value": "The answer is Dark Red" } ], "clip_score": 28.203125, "loss": 4.14597749710083 }, { "id": "identity_337721", "image": "A-OKVQA/images/24721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the wooden thing here?\nChoices:\n(A) paddle\n(B) spoon\n(C) pirate leg\n(D) desk" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.90625, "loss": 4.521097183227539 }, { "id": "identity_216293", "image": "A-OKVQA/images/513053.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.2421875, "loss": 4.450230598449707 }, { "id": "identity_235253", "image": "AI2D/images/1767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all organisms in the food web that get energy from both producers and consumers, and then determine which of these organisms is also preyed upon by the owl" }, { "from": "gpt", "value": "The answer is Wood mouse and shrew" } ], "clip_score": 34.34375, "loss": 4.210329055786133 }, { "id": "identity_297608", "image": "A-OKVQA/images/580414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the distant large triangular object most likely to be?\nChoices:\n(A) sculpture\n(B) cloud\n(C) mountain\n(D) mall" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1328125, "loss": 4.40925931930542 }, { "id": "identity_105062", "image": "TextVQA/images/cd32056f48c8387f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the author of the book?" }, { "from": "gpt", "value": "The answer is Peter Morville" } ], "clip_score": 15.109375, "loss": 4.114779472351074 }, { "id": "identity_44199", "image": "DVQA/images/bar_train_00078290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest number of people preferring the \"degree\" subcategory?" }, { "from": "gpt", "value": "The answer is holder" } ], "clip_score": 28.296875, "loss": 5.187947750091553 }, { "id": "identity_329973", "image": "TextVQA/images/3a6d2b256ab581fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of bottled water is shown?" }, { "from": "gpt", "value": "The answer is minere" } ], "clip_score": 17.546875, "loss": 4.057972431182861 }, { "id": "identity_40623", "image": "DVQA/images/bar_train_00165552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the \"job\" store?" }, { "from": "gpt", "value": "The answer is chief" } ], "clip_score": 25.78125, "loss": 4.185755252838135 }, { "id": "identity_228657", "image": "ScienceQA/images/4973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital. Additionally, list one neighboring state and its capital" }, { "from": "gpt", "value": "The answer is Montpelier, Concord" } ], "clip_score": 28.515625, "loss": 4.015647888183594 }, { "id": "identity_333730", "image": "PMC-VQA/images/PMC6696785_F1_24646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the contrast enhancement focus indicate?\nChoices:\n(A) Normal growth\n(B) Abnormal growth\n(C) Tumor\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.21875, "loss": 4.1955766677856445 }, { "id": "identity_264103", "image": "A-OKVQA/images/270198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the circular patterns on the ground represent?\nChoices:\n(A) abstract drawing\n(B) paint\n(C) tire tracks\n(D) cracks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.7109375, "loss": 4.576452255249023 }, { "id": "identity_15775", "image": "AI2D/images/2940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the most flexible and hardest muscle inside the mouth?\nChoices:\n(A) Tonsils\n(B) Soft palate\n(C) Lip\n(D) Tongue" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.984375, "loss": 4.130936622619629 }, { "id": "identity_271989", "image": "PMC-VQA/images/PMC6352546_f0050_429212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view was utilized to examine the tissue integration of the scaffolds? \nOptions:\n(A) Top-down perspective \n(B) Middle in-plane perspective \n(C) Bottom-up perspective \n(D) Oblique perspective" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.056708335876465 }, { "id": "identity_141559", "image": "DVQA/images/bar_train_00025614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset shows the highest accuracy for the 'excess' algorithm?" }, { "from": "gpt", "value": "The answer is help" } ], "clip_score": 32.84375, "loss": 4.057684898376465 }, { "id": "identity_113519", "image": "VQA-AS/images/15288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many curtains are visible in the room?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.796875, "loss": 4.720879554748535 }, { "id": "identity_316874", "image": "FigureQA/images/97581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 4.075408935546875 }, { "id": "identity_278496", "image": "DVQA/images/bar_train_00106175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values exceeding 2?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 18.453125, "loss": 4.376490592956543 }, { "id": "identity_29397", "image": "A-OKVQA/images/58143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sound will the occupants on the apartments hear through their windows?\nChoices:\n(A) people talking\n(B) bikers\n(C) man walking\n(D) train" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.109375, "loss": 4.459392547607422 }, { "id": "identity_309843", "image": "ScienceQA/images/14953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Des Moines\n(B) Jefferson City\n(C) Minneapolis\n(D) Davenport" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.859375, "loss": 4.399328708648682 }, { "id": "identity_26582", "image": "A-OKVQA/images/412094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the train near?\nChoices:\n(A) cow\n(B) building\n(C) wheelbarrow\n(D) freeway" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.814980983734131 }, { "id": "identity_17676", "image": "IconQA/images/41540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 4.040859222412109 }, { "id": "identity_154893", "image": "ScienceQA/images/12568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is Missouri River" } ], "clip_score": 27.28125, "loss": 4.167238235473633 }, { "id": "identity_320608", "image": "TextVQA/images/d3bdf740a9533be9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the green shirt on the person to the right?" }, { "from": "gpt", "value": "The answer is wifa" } ], "clip_score": 11.796875, "loss": 4.998007297515869 }, { "id": "identity_261768", "image": "FigureQA/images/67925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options correctly states whether Light Coral is the roughest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.625, "loss": 4.003156661987305 }, { "id": "identity_252126", "image": "TextVQA/images/14860b06ba90af45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the text and symbols on the back of the watch?" }, { "from": "gpt", "value": "The answer is Commemorates the Mille Miglia race route and key locations" } ], "clip_score": 28.9375, "loss": 4.1045098304748535 }, { "id": "identity_302361", "image": "PMC-VQA/images/PMC9042937_Fig3_265881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is marked in green in this image?\nChoices:\n(A) Nuclei \n(B) Vascular endothelium \n(C) Synapses \n(D) Axons" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.419508457183838 }, { "id": "identity_30023", "image": "PMC-VQA/images/PMC6373364_Fig1_435169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of images are shown in this representation?\nChoices:\n(A) T1-weighted images; \n(B) CT scans; \n(C) T2-weighted images; \n(D) X-rays." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5, "loss": 4.466150760650635 }, { "id": "identity_17571", "image": "Super-CLEVR/images/superCLEVR_new_000436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The yellow metal scooter has what size?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 22.1875, "loss": 4.1227827072143555 }, { "id": "identity_301812", "image": "ScienceQA/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is a secondary consumer in this food web?\nChoices:\n(A) zooplankton\n(B) phytoplankton\n(C) kelp\n(D) plainfin midshipman" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.890625, "loss": 4.17957878112793 }, { "id": "identity_328638", "image": "PMC-VQA/images/PMC6905104_Fig5_82262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is indicated by the arrowheads in this MRI sequence?\nChoices:\n(A) Pulp \n(B) Cortical alveolar \n(C) Extra-alveolar part of dental hard tissues \n(D) Soft tissue inside the infra-orbital canal." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 4.796952247619629 }, { "id": "identity_128863", "image": "PMC-VQA/images/PMC8117624_Fig2_439345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shade of the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.546875, "loss": 4.730657577514648 }, { "id": "identity_33706", "image": "A-OKVQA/images/407413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the camera man jumped over the railing closest to them where would they land?\nChoices:\n(A) table\n(B) grass\n(C) stairs\n(D) kitchen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.265625, "loss": 4.284125328063965 }, { "id": "identity_262001", "image": "VQA2.0/images/522708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the issue with the passenger seated at the back of the motorcycle?" }, { "from": "gpt", "value": "The answer is nothing" } ], "clip_score": 23.421875, "loss": 4.535772800445557 }, { "id": "identity_7777", "image": "TQA/images/abc_question_images/human_system_eye_16132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the lens?\nChoices:\n(A) D\n(B) W\n(C) S\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.53159236907959 }, { "id": "identity_284027", "image": "ScienceQA/images/8270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Edgar's experiment to answer:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.421875, "loss": 4.161902904510498 }, { "id": "identity_336519", "image": "IconQA/images/66868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.453125, "loss": 4.034758567810059 }, { "id": "identity_268745", "image": "DocVQA/images/grcv0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does \"Dept.\" stand for?" }, { "from": "gpt", "value": "The answer is Oper. Dev." } ], "clip_score": 14.8984375, "loss": 4.370468616485596 }, { "id": "identity_117656", "image": "Super-CLEVR/images/superCLEVR_new_001286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is The train is red" } ], "clip_score": 24.78125, "loss": 4.427675247192383 }, { "id": "identity_334389", "image": "PMC-VQA/images/PMC6232946_rcr2385-fig-0001_395611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging was used to capture this? \nChoices:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.536991119384766 }, { "id": "identity_125921", "image": "PMC-VQA/images/PMC6571909_jcm-08-00701-f001_494621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the color scales on the left side of each colored image?" }, { "from": "gpt", "value": "The answer is To represent value ranges" } ], "clip_score": 19.71875, "loss": 4.729760646820068 }, { "id": "identity_295432", "image": "PMC-VQA/images/PMC9175008_F4_304069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of vessels are indicated by the yellow arrowheads in the illustration? \n(A) Arteries \n(B) Veins \n(C) Capillaries \n(D) Lymphatics" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.400464057922363 }, { "id": "identity_21558", "image": "A-OKVQA/images/179153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the woman in the white jacket wearing a helmet?\nChoices:\n(A) dress code\n(B) for fun\n(C) style\n(D) protection" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.90625, "loss": 4.0353007316589355 }, { "id": "identity_201091", "image": "PMC-VQA/images/PMC3504249_fig2_167977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is primarily displayed in this image?" }, { "from": "gpt", "value": "The answer is Thigh region" } ], "clip_score": 20.796875, "loss": 4.9761810302734375 }, { "id": "identity_333268", "image": "A-OKVQA/images/225850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this individual most likely preparing to do?\nChoices:\n(A) working\n(B) birdwatching\n(C) gymnastics\n(D) jogging" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.109375, "loss": 4.364039897918701 }, { "id": "identity_36965", "image": "PMC-VQA/images/PMC8099429_fig5s3_434013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition in which the images were taken? \nChoices:\n(A) Fixed conditions \n(B) Unfixed conditions \n(C) GTA conditions \n(D) Micron conditions " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.132754325866699 }, { "id": "identity_305716", "image": "FigureQA/images/8255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.669821262359619 }, { "id": "identity_33054", "image": "A-OKVQA/images/150410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man behind?\nChoices:\n(A) cardboard box\n(B) wrestling ring\n(C) basketball\n(D) replica airplane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.28125, "loss": 4.535233020782471 }, { "id": "identity_23529", "image": "A-OKVQA/images/362005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area the man is skating in made of?\nChoices:\n(A) metal\n(B) plastic\n(C) concrete\n(D) wood" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.1875, "loss": 4.26662015914917 }, { "id": "identity_270507", "image": "PMC-VQA/images/PMC7149879_Fig5_149719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) An image of the tumor under a microscope\n(B) An image of the surgical area after the tumor removal\n(C) An image of the surgical area before the tumor removal\n(D) An image of the tumor under a CT scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.84375, "loss": 4.095689296722412 }, { "id": "identity_307979", "image": "FigureQA/images/41709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.890625, "loss": 4.8031816482543945 }, { "id": "identity_288745", "image": "A-OKVQA/images/292990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the dominant hand of the batter in this scenario? Is it the left hand, right hand, neither, or the left foot?\nChoices:\n(A) left\n(B) neither\n(C) left foot\n(D) right" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.078125, "loss": 4.649714469909668 }, { "id": "identity_58883", "image": "DVQA/images/bar_train_00157624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the highest sales for the 'duty' item?" }, { "from": "gpt", "value": "The answer is walk" } ], "clip_score": 29.140625, "loss": 4.156048774719238 }, { "id": "identity_218440", "image": "A-OKVQA/images/526892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fluorescent tubes are visible in the light fixture?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.96875, "loss": 4.332328796386719 }, { "id": "identity_33805", "image": "DVQA/images/bar_train_00050305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is damage" } ], "clip_score": 20.578125, "loss": 4.057826995849609 }, { "id": "identity_209896", "image": "TextVQA/images/0d843d885da424e6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of closure does the side pocket of the backpack have?" }, { "from": "gpt", "value": "The answer is Mesh with elastic" } ], "clip_score": 26.8125, "loss": 4.081902503967285 }, { "id": "identity_335325", "image": "A-OKVQA/images/349059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the substance in the tube be squeezed onto?\nChoices:\n(A) onto washcloth\n(B) toothbrush\n(C) into sink\n(D) onto soap" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 4.545443534851074 }, { "id": "identity_235613", "image": "FigureQA/images/81567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest value at x = 50, and what is the difference in value between this color and Dark Orchid at the same x-coordinate?" }, { "from": "gpt", "value": "The answer is Red, difference is 1" } ], "clip_score": 29.578125, "loss": 4.213835716247559 }, { "id": "identity_275110", "image": "FigureQA/images/54826.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Orchid or Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.75, "loss": 4.4620513916015625 }, { "id": "identity_127956", "image": "VQA2.0/images/151577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many construction cranes are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.75, "loss": 4.569828987121582 }, { "id": "identity_281074", "image": "FigureQA/images/40266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.734375, "loss": 4.253407001495361 }, { "id": "identity_325398", "image": "PMC-VQA/images/PMC7773299_FIG5_332416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the CT angiogram of this area?\nChoices:\n(A) Dilated blood vessels\n(B) Peripheral ground-glass opacities bilaterally\n(C) Blood clots\n(D) Cysts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.34014892578125 }, { "id": "identity_9196", "image": "PMC-VQA/images/PMC7286949_Fig4_189570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in image a? \nChoices:\n(A) Scarred tissue \n(B) Left-side pelvic tumor \n(C) Right-side pelvic tumor \n(D) Lung metastasis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 4.562355995178223 }, { "id": "identity_336719", "image": "PMC-VQA/images/PMC8083891_f1_430536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red * in the image indicate? \nChoices:\n(A) Esophageal hernia \n(B) Lung calcification \n(C) Pericardial effusion \n(D) Pulmonary nodule" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 4.608921051025391 }, { "id": "identity_56240", "image": "DocVQA/images/phpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who wrote the letter?" }, { "from": "gpt", "value": "The answer is Fideles Klocekler" } ], "clip_score": 16.953125, "loss": 5.1706414222717285 }, { "id": "identity_11892", "image": "DVQA/images/bar_train_00072216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third group of bars from the left?" }, { "from": "gpt", "value": "The answer is shock" } ], "clip_score": 18.953125, "loss": 4.028085708618164 }, { "id": "identity_5994", "image": "PMC-VQA/images/PMC3708829_pgen-1003641-g010_217436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is present in all images, except B and F? \nChoices:\n(A) Nucleic acids \n(B) F-actin \n(C) Pendrin \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.859375, "loss": 4.080942153930664 }, { "id": "identity_302508", "image": "FigureQA/images/58856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.125, "loss": 4.323126792907715 }, { "id": "identity_130108", "image": "TextVQA/images/0b71ba450dc201c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the four-letter word inscribed at the bottom of the left seal?" }, { "from": "gpt", "value": "The answer is DOMI" } ], "clip_score": 18.328125, "loss": 4.092040538787842 }, { "id": "identity_197703", "image": "TextVQA/images/64bc09d5e06d9a2d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the denomination of the coin?" }, { "from": "gpt", "value": "The answer is Shilling" } ], "clip_score": 31.390625, "loss": 4.012078762054443 }, { "id": "identity_321926", "image": "A-OKVQA/images/174188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which famous movie character could use these floating objects for transportation?\nChoices:\n(A) ariel\n(B) bugs bunny\n(C) mary poppins\n(D) tupac shakur" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.7265625, "loss": 4.3104119300842285 }, { "id": "identity_249971", "image": "Super-CLEVR/images/superCLEVR_new_001325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of the same material as the red thing, and what are their colors?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 23.328125, "loss": 4.208395957946777 }, { "id": "identity_35220", "image": "PMC-VQA/images/PMC6269335_Fig20_404805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI image show? \nChoices:\n(A) A spinal cord injury. \n(B) A brain tumor. \n(C) A hearing loss. \n(D) A broken bone. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.177742958068848 }, { "id": "identity_330706", "image": "PMC-VQA/images/PMC7514353_entropy-21-01049-f006_260703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red contour lines indicate in each figure?\nChoices:\n(A) Position of vortices\n(B) Streamwise velocity values\n(C) Time interval\n(D) Normalized drift velocity values" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.46875, "loss": 4.440464019775391 }, { "id": "identity_336449", "image": "PMC-VQA/images/PMC7201798_Fig1_164065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the CT scan of the individual?\nChoices:\n(A) A small mass in the anterior mediastinum\n(B) A large mass in the posterior mediastinum\n(C) A large mass in the anterior mediastinum\n(D) No mass was found in the mediastinum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.015625, "loss": 4.138904571533203 }, { "id": "identity_32219", "image": "PMC-VQA/images/PMC4729705_Fig19_466499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the lesion? \nChoices:\n(A) intraconal \n(B) extraconal \n(C) lateral \n(D) inferior " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.28125, "loss": 4.577528476715088 }, { "id": "identity_31513", "image": "AI2D/images/4506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which category does leaf A belong?\nChoices:\n(A) lacerate\n(B) undulate\n(C) entire\n(D) crenate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.765625, "loss": 4.248306751251221 }, { "id": "identity_217264", "image": "PMC-VQA/images/PMC6359681_F2_431312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is depicted in the image?" }, { "from": "gpt", "value": "The answer is MR perfusion" } ], "clip_score": 28.34375, "loss": 4.877077579498291 }, { "id": "identity_317751", "image": "MapQA/images/map_8958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which places have the lowest value in the USA?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 29.984375, "loss": 4.239905834197998 }, { "id": "identity_133662", "image": "Super-CLEVR/images/superCLEVR_new_001199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the yellow bicycle?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 26.859375, "loss": 4.117164134979248 }, { "id": "identity_16914", "image": "IconQA/images/39296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a fish. Which picture is second?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.125, "loss": 4.3436784744262695 }, { "id": "identity_60264", "image": "DocVQA/images/ysbw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the presentation titled \"Self-treatment patterns among adolescent girls with moderate or severe primary dysmenorrhea\"?" }, { "from": "gpt", "value": "The answer is GMA" } ], "clip_score": 17.875, "loss": 4.765964031219482 }, { "id": "identity_13352", "image": "AI2D/images/3452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What dual structures protrude from its head?\nChoices:\n(A) antennae\n(B) legs\n(C) none of the above\n(D) wing pads" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 4.245057106018066 }, { "id": "identity_322970", "image": "A-OKVQA/images/182715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is this food being handled?\nChoices:\n(A) scooped\n(B) sliced\n(C) boiled\n(D) poured" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2734375, "loss": 4.3643798828125 }, { "id": "identity_323856", "image": "ScienceQA/images/20654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Pittsburgh\n(B) Philadelphia\n(C) Harrisburg\n(D) Montpelier" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.984375, "loss": 4.162033557891846 }, { "id": "identity_286448", "image": "ScienceQA/images/4325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does a hammer belong to: solid, liquid, or gas?\nChoices:\n(A) a solid\n(B) a gas\n(C) a liquid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.53125, "loss": 4.005589962005615 }, { "id": "identity_16517", "image": "ChartQA/images/1030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the color of smallest slice in chart?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 15.34375, "loss": 4.83447790145874 }, { "id": "identity_285165", "image": "TextVQA/images/54bd5cfdda94d534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word that appears on the title of the red box on the water bottle?" }, { "from": "gpt", "value": "The answer is tavina" } ], "clip_score": 14.9453125, "loss": 4.134511470794678 }, { "id": "identity_238345", "image": "Super-CLEVR/images/superCLEVR_new_001819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same shape as the large blue object, and what are their sizes?" }, { "from": "gpt", "value": "The answer is Two objects, both large" } ], "clip_score": 21.34375, "loss": 4.5284810066223145 }, { "id": "identity_67943", "image": "ScienceQA/images/4234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are there in the immediate vicinity of the highlighted country?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.0625, "loss": 4.813887596130371 }, { "id": "identity_14440", "image": "ScienceQA/images/6106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of North Carolina?\nChoices:\n(A) Charlotte\n(B) Hartford\n(C) Raleigh\n(D) Oklahoma City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.53125, "loss": 4.2266621589660645 }, { "id": "identity_324927", "image": "PMC-VQA/images/PMC4575171_pone.0138226.g003_424927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is highlighted in this section?\nChoices:\n(A) Muscle \n(B) Cartilage \n(C) Bone \n(D) Tendon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.205259799957275 }, { "id": "identity_295368", "image": "ScienceQA/images/15825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.100600242614746 }, { "id": "identity_48692", "image": "DocVQA/images/zxnf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the \"dummy\" program mentioned in the memorandum?" }, { "from": "gpt", "value": "The answer is For your information" } ], "clip_score": 17.640625, "loss": 4.238478183746338 }, { "id": "identity_282373", "image": "PMC-VQA/images/PMC7581219_FIG3_280601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which perspectives were utilized to evaluate the avidity? \nOptions:\n(A) Transverse, frontal, and oblique\n(B) Axial, sagittal, and coronal\n(C) Anterior, posterior, and lateral\n(D) Base projection, lateral projection, and AP projection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.796875, "loss": 4.1579742431640625 }, { "id": "identity_28015", "image": "FigureQA/images/50212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.015625, "loss": 4.098831653594971 }, { "id": "identity_275022", "image": "AI2D/images/3268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Between which labeled parts, G and F, can you find the following structures: Chromatin, Intermediate filaments, Lysosome, and Peroxisome?\nChoices:\n(A) Chromatin\n(B) Intermediate filaments\n(C) Lysosome\n(D) Peroxisome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.453125, "loss": 4.232987403869629 }, { "id": "identity_44337", "image": "DVQA/images/bar_train_00173544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the second highest value?" }, { "from": "gpt", "value": "The answer is corner" } ], "clip_score": 22.34375, "loss": 4.37958288192749 }, { "id": "identity_287391", "image": "PMC-VQA/images/PMC4466257_pone.0129912.g013_396379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction does the external carotid artery run in the upper third of the mouse neck?\nChoices:\n(A) Posteriorly\n(B) Anteriorly\n(C) Laterally\n(D) Medially" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 4.379349231719971 }, { "id": "identity_317636", "image": "FigureQA/images/44776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum amount of Forest Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.015625, "loss": 4.569812297821045 }, { "id": "identity_262496", "image": "IconQA/images/22054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on: white or yellow?\nChoices:\n(A) white\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.174410343170166 }, { "id": "identity_333294", "image": "PMC-VQA/images/PMC6414139_acm212503-fig-0004_449322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this panel show?\nChoices:\n(A) soft tissue in a maxillary sinus\n(B) increase in dose to a retina\n(C) surgical flap shrinkage\n(D) a left maxillary sinus condition" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.90625, "loss": 4.087577819824219 }, { "id": "identity_246454", "image": "FigureQA/images/20889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Medium Aqua and Light Gold, and how does it compare to the percentage of Brown?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 42.4375, "loss": 5.0820231437683105 }, { "id": "identity_204440", "image": "TextVQA/images/9d0d88bbbbf4572e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different newspaper titles can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.53125, "loss": 4.29026985168457 }, { "id": "identity_301170", "image": "IconQA/images/66567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Provide a time word, not a number. It is (_) to ten." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 27.65625, "loss": 4.877413272857666 }, { "id": "identity_280423", "image": "IconQA/images/42027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.770606994628906 }, { "id": "identity_133786", "image": "VizWiz/images/VizWiz_train_00014646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books can be seen in the background?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 13.75, "loss": 4.8452277183532715 }, { "id": "identity_16583", "image": "TextVQA/images/61408ab56f2fb616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what program will help you take dreams further?" }, { "from": "gpt", "value": "The answer is quickmemo" } ], "clip_score": 19.71875, "loss": 4.207717418670654 }, { "id": "identity_270690", "image": "PMC-VQA/images/PMC5964484_fig5_317749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in image (d) represent?\nChoices:\n(A) Irrelevant information \n(B) ADC map abnormalities \n(C) FLAIR abnormalities \n(D) Increased attenuation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.90625, "loss": 4.807461738586426 }, { "id": "identity_74352", "image": "ScienceQA/images/15217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many finger holes does the second object have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.5625, "loss": 4.367009162902832 }, { "id": "identity_281712", "image": "AI2D/images/4085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which label does the mantle point?\nChoices:\n(A) C\n(B) G\n(C) B\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.421875, "loss": 4.140299320220947 }, { "id": "identity_189141", "image": "PMC-VQA/images/PMC8796580_Fig2_179495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is being highlighted by the green fluorescence in the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 30.140625, "loss": 4.14240026473999 }, { "id": "identity_252338", "image": "ScienceQA/images/16558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property contribute to their primary function?" }, { "from": "gpt", "value": "The answer is bendable property contributes to portability and comfort" } ], "clip_score": 25.796875, "loss": 4.792129039764404 }, { "id": "identity_52742", "image": "DocVQA/images/lklg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the blank space provided after section 5?" }, { "from": "gpt", "value": "The answer is To provide a reason for impracticability" } ], "clip_score": 20.796875, "loss": 4.045156478881836 }, { "id": "identity_263771", "image": "VQA-AS/images/11190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of the flower?" }, { "from": "gpt", "value": "The answer is on table" } ], "clip_score": 19.28125, "loss": 4.0019378662109375 }, { "id": "identity_267457", "image": "PMC-VQA/images/PMC6286579_Fig2_409531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of imaging technique was utilized to observe the nodule? \nOptions:\n(A) CT scan\n(B) PET scan\n(C) MRI scan\n(D) X-ray scan." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5625, "loss": 4.491135597229004 }, { "id": "identity_20783", "image": "A-OKVQA/images/452775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely continent for this setting?\nChoices:\n(A) africa\n(B) australia\n(C) south america\n(D) antarctica" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.859375, "loss": 4.9011335372924805 }, { "id": "identity_8043", "image": "ScienceQA/images/6754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Carson City\n(B) Tucson\n(C) Frankfort\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.171875, "loss": 4.473461151123047 }, { "id": "identity_203093", "image": "TextVQA/images/38aeecdb55de56ac.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style is the Colonial Bank building noted for?" }, { "from": "gpt", "value": "The answer is Boom classicism" } ], "clip_score": 36.78125, "loss": 4.287087917327881 }, { "id": "identity_200267", "image": "PMC-VQA/images/PMC9236663_FIG1_325682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the OCT image?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 24.0625, "loss": 4.413957595825195 }, { "id": "identity_273602", "image": "PMC-VQA/images/PMC7450087_Fig3_240709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red line in (a) represent?\nChoices:\n(A) Lower power images\n(B) Higher power images\n(C) Areas of interest\n(D) Nothing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2265625, "loss": 4.840811729431152 }, { "id": "identity_231882", "image": "Super-CLEVR/images/superCLEVR_new_000628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image share the same shape as the large cyan rubber thing, and what are their colors?" }, { "from": "gpt", "value": "The answer is One object, cyan" } ], "clip_score": 21.84375, "loss": 4.664837837219238 }, { "id": "identity_281158", "image": "ScienceQA/images/3741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do cells of Hevea brasiliensis contain a nucleus?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 5.7578125, "loss": 4.149899959564209 }, { "id": "identity_327374", "image": "PMC-VQA/images/PMC7005084_Fig1_112725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe is showing unusual signals in the image?\nChoices:\n(A) Frontal lobe\n(B) Occipital lobe\n(C) Temporal lobe\n(D) Parietal lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.71875, "loss": 4.306408882141113 }, { "id": "identity_126538", "image": "PMC-VQA/images/PMC3386976_pone-0040352-g002_143406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary blood vessels are emanating from the optic nerve head?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 34.125, "loss": 4.079907417297363 }, { "id": "identity_315975", "image": "AI2D/images/3612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two large structures on either side?\nChoices:\n(A) sinuses\n(B) lungs\n(C) trachea\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.9375, "loss": 4.081322193145752 }, { "id": "identity_214022", "image": "A-OKVQA/images/412760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.875, "loss": 4.166904449462891 }, { "id": "identity_19878", "image": "A-OKVQA/images/393630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could offer protection from the sun?\nChoices:\n(A) bench\n(B) boat\n(C) tree shade\n(D) jackets" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.46875, "loss": 4.538235187530518 }, { "id": "identity_258831", "image": "A-OKVQA/images/362090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps the person will likely take to stand up and grab the cane, and what will they do immediately after grabbing the cane?" }, { "from": "gpt", "value": "The answer is Adjust posture to ensure stability" } ], "clip_score": 23.1875, "loss": 4.619905471801758 }, { "id": "identity_2512", "image": "VQA2.0/images/448609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man wearing?" }, { "from": "gpt", "value": "The answer is clothes" } ], "clip_score": 10.0859375, "loss": 4.76143741607666 }, { "id": "identity_8057", "image": "AI2D/images/2339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the end stage of this cycle?\nChoices:\n(A) Medusa Bud\n(B) Frustule bud\n(C) Mature Medusa\n(D) Colony" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.0, "loss": 4.22870397567749 }, { "id": "identity_307614", "image": "AI2D/images/4223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what type of rock is indicated at point B?\nChoices:\n(A) surface rock\n(B) metamorphic\n(C) igneous\n(D) sediment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 4.866153717041016 }, { "id": "identity_311416", "image": "A-OKVQA/images/147970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part is the frisbee being thrown under by the person?\n\nChoices:\n(A) left arm\n(B) right leg\n(C) right arm\n(D) left leg" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.125, "loss": 4.239161968231201 }, { "id": "identity_275105", "image": "Super-CLEVR/images/superCLEVR_new_000843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the tiny minivan have the same color as the shiny road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.78125, "loss": 5.459359169006348 }, { "id": "identity_29478", "image": "FigureQA/images/96921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Orchid have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.609375, "loss": 4.1674981117248535 }, { "id": "identity_297094", "image": "PMC-VQA/images/PMC5859155_Fig2_291105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the black dots on the stereographic projections?\nChoices:\n(A) Main poles \n(B) Secondary poles \n(C) Impurities \n(D) Defects" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.90625, "loss": 4.86763858795166 }, { "id": "identity_302923", "image": "Super-CLEVR/images/superCLEVR_new_001251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the rubber vehicle the same color as the matte vehicle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.484375, "loss": 5.788564682006836 }, { "id": "identity_37490", "image": "DocVQA/images/jsbf0004_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the response code?" }, { "from": "gpt", "value": "The answer is U70" } ], "clip_score": 17.203125, "loss": 4.319365978240967 }, { "id": "identity_314555", "image": "A-OKVQA/images/285096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is one action the owner of the yellow boat could take to maintain its value?\nChoices:\n(A) junk it\n(B) sink it\n(C) paint\n(D) add flag" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.96875, "loss": 4.215649604797363 }, { "id": "identity_280196", "image": "PMC-VQA/images/PMC7828418_life-11-00047-f008_350918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white squares represent in the first photo?\nChoices:\n(A) the experimental lice \n(B) the intestine of the lice \n(C) the arrowheads \n(D) the midgut" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.078125, "loss": 4.5321478843688965 }, { "id": "identity_279981", "image": "VizWiz/images/VizWiz_train_00017851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The ceremony and speech are over. Is it time to leave now?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.3515625, "loss": 4.485901355743408 }, { "id": "identity_24974", "image": "TextVQA/images/7f191d4262b7b28a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what sport are they playing?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 22.0, "loss": 4.579920768737793 }, { "id": "identity_313879", "image": "ScienceQA/images/14761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter is a chocolate bar?\nChoices:\n(A) a solid\n(B) a liquid\n(C) a gas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.390625, "loss": 4.0665483474731445 }, { "id": "identity_294894", "image": "PMC-VQA/images/PMC4062408_pone-0097368-g005_299287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the small rectangle on the axial localizer image?\nChoices:\n(A) Position of a voxel from the brain \n(B) Position of a voxel from the liver \n(C) Position of a voxel from the thorax muscles \n(D) Position of a voxel from the legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.90625, "loss": 4.787411689758301 }, { "id": "identity_261683", "image": "PMC-VQA/images/PMC6115612_F8_361014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the four views of the brain from the following options:\n(A) Anterior, inferior, dorsal, ventral \n(B) Posterior, superior, left, right \n(C) Medial, lateral, posterior, superior \n(D) Superior, inferior, left, right" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.484375, "loss": 4.041078090667725 }, { "id": "identity_281336", "image": "PMC-VQA/images/PMC7849607_F2_357213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following conditions is indicated by the black arrows in the image? \n(A) Gyral atrophy \n(B) Gyral hyperplasia \n(C) Gyral edema \n(D) Gyral ossification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.828125, "loss": 4.463095664978027 }, { "id": "identity_331322", "image": "A-OKVQA/images/535692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is surfing originally from?\nChoices:\n(A) mexico\n(B) hawaii\n(C) polynesia\n(D) argentina" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0, "loss": 4.052487373352051 }, { "id": "identity_18836", "image": "TQA/images/question_images/periodic_table_8158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element is tetravalent?\nChoices:\n(A) Oxygen\n(B) Carbon\n(C) Boron\n(D) Nitrogen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.890625, "loss": 4.499370574951172 }, { "id": "identity_313271", "image": "A-OKVQA/images/550287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of animals are depicted?\nChoices:\n(A) stuffed\n(B) aquatic\n(C) wild\n(D) domestic" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.21875, "loss": 4.010251045227051 }, { "id": "identity_108346", "image": "VizWiz/images/VizWiz_train_00005558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the objects in the image?" }, { "from": "gpt", "value": "The answer is Conical" } ], "clip_score": 16.375, "loss": 5.014382839202881 }, { "id": "identity_270397", "image": "FigureQA/images/94701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Maroon the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.03125, "loss": 4.170075416564941 }, { "id": "identity_186756", "image": "PMC-VQA/images/PMC8106577_Fig1_436021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface surrounding the circular structures?" }, { "from": "gpt", "value": "The answer is Description of the surface texture" } ], "clip_score": 24.40625, "loss": 4.070580959320068 }, { "id": "identity_319868", "image": "AI2D/images/286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is an omnivore according to this? \nChoices:\n(A) Zebra\n(B) Lion\n(C) Baboon\n(D) Hyena" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.34375, "loss": 4.15441370010376 }, { "id": "identity_36232", "image": "TQA/images/question_images/simple_machines_7556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a pulley, what happens to the load when force is applied?\nChoices:\n(A) The load goes down\n(B) The load doesn't move\n(C) The load goes up\n(D) The load falls off" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.421875, "loss": 4.133457183837891 }, { "id": "identity_335837", "image": "PMC-VQA/images/PMC5472191_f0030_179546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was C5 located?\nChoices:\n(A) Frontal lobe \n(B) Parietal lobe \n(C) Occipital lobe \n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.171875, "loss": 4.650892734527588 }, { "id": "identity_319760", "image": "A-OKVQA/images/488014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the animal appear to be standing?\nChoices:\n(A) grass\n(B) parking lot\n(C) street\n(D) sidewalk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.390625, "loss": 4.421992778778076 }, { "id": "identity_210495", "image": "A-OKVQA/images/425945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many suitcases are stacked in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.21875, "loss": 4.399418830871582 }, { "id": "identity_301120", "image": "TabMWP/images/788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.1875, "loss": 4.232630729675293 }, { "id": "identity_30280", "image": "PMC-VQA/images/PMC4222226_F1_333338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI slice was used to examine the patient? \nChoices:\n(A) T1-weighted axial slice \n(B) T2-weighted coronal slice \n(C) T1-weighted sagittal slice \n(D) FLAIR coronal slice " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 4.3470001220703125 }, { "id": "identity_298525", "image": "PMC-VQA/images/PMC7198717_F1_162893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the CT axial images, where were the two skull lesions located?\nChoices:\n(A) Temporal lobe\n(B) Pituitary gland\n(C) Frontal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.4375, "loss": 4.424759864807129 }, { "id": "identity_227321", "image": "Super-CLEVR/images/superCLEVR_new_001442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of rubber, and what are their colors?" }, { "from": "gpt", "value": "The answer is Three objects; colors are purple" } ], "clip_score": 26.71875, "loss": 4.19472074508667 }, { "id": "identity_23118", "image": "PMC-VQA/images/PMC9374607_Fig1_371743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the patient's brain is affected by the occlusion?\nChoices:\n(A) Right hemisphere \n(B) Left hemisphere \n(C) Cerebellum \n(D) Brainstem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.078125, "loss": 4.133039474487305 }, { "id": "identity_251311", "image": "DocVQA/images/hgxv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the person listed fifth in the distribution list?" }, { "from": "gpt", "value": "The answer is Brenimer" } ], "clip_score": 23.0, "loss": 4.422181129455566 }, { "id": "identity_299683", "image": "TQA/images/abc_question_images/tides_12643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the sun from the given choices: (A) X, (B) R, (C) K, (D) O." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.10731840133667 }, { "id": "identity_309150", "image": "FigureQA/images/51383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8359375, "loss": 4.868494987487793 }, { "id": "identity_121284", "image": "A-OKVQA/images/474390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many jet bridges are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.6875, "loss": 5.0980329513549805 }, { "id": "identity_8776", "image": "AI2D/images/3042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this diagram, which which label stands for Centrioles ?\nChoices:\n(A) D\n(B) F\n(C) L\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.090195178985596 }, { "id": "identity_306261", "image": "FigureQA/images/91233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.234375, "loss": 4.24342679977417 }, { "id": "identity_286662", "image": "A-OKVQA/images/5620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what type of roadway are these buses currently traveling?\nChoices:\n(A) intersection\n(B) highway\n(C) rural road\n(D) traffic circle" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.90625, "loss": 4.2138872146606445 }, { "id": "identity_316463", "image": "TextVQA/images/67ccc0a32e26b4b6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this situated?" }, { "from": "gpt", "value": "The answer is that" } ], "clip_score": 11.6875, "loss": 4.58113956451416 }, { "id": "identity_327209", "image": "TextVQA/images/007ce03e56abd3f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this phone?" }, { "from": "gpt", "value": "The answer is htc" } ], "clip_score": 18.0625, "loss": 4.037164211273193 }, { "id": "identity_26164", "image": "AI2D/images/118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What term best describes B in the diagram?\nChoices:\n(A) Autumn\n(B) Winter\n(C) Spring\n(D) Summer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.828125, "loss": 4.350788593292236 }, { "id": "identity_270041", "image": "A-OKVQA/images/204181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of kite is it? Is it:\n(A) polygonal\n(B) symmetrical\n(C) rectangle\n(D) rhombus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.796875, "loss": 4.019659996032715 }, { "id": "identity_188651", "image": "TextVQA/images/5e9e94a96d262585.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the app located between the Photos and YouTube app on the iPhone's home screen?" }, { "from": "gpt", "value": "The answer is SoilWeb" } ], "clip_score": 23.40625, "loss": 4.6632771492004395 }, { "id": "identity_312376", "image": "DVQA/images/bar_train_00179493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dataset does this color represent?" }, { "from": "gpt", "value": "The answer is length" } ], "clip_score": 24.25, "loss": 4.0896430015563965 }, { "id": "identity_235126", "image": "PMC-VQA/images/PMC7450087_Fig3_240709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red line in (a) in relation to the areas of interest indicated by the arrows in the Hoechst image?" }, { "from": "gpt", "value": "The answer is It helps identify general areas of interest" } ], "clip_score": 18.859375, "loss": 4.325109481811523 }, { "id": "identity_285509", "image": "VizWiz/images/VizWiz_train_00019559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.703125, "loss": 4.114475250244141 }, { "id": "identity_74807", "image": "AI2D/images/1800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in the food chain?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.625, "loss": 4.036353588104248 }, { "id": "identity_296419", "image": "A-OKVQA/images/39846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the vent located on top of the stove used?\nChoices:\n(A) smoke\n(B) soda\n(C) coffee\n(D) water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.75, "loss": 4.111763954162598 }, { "id": "identity_53145", "image": "DocVQA/images/mpcw0217_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which paper was submitted to J Women's Health Gender-Based Med.?" }, { "from": "gpt", "value": "The answer is Ke (BB #3)" } ], "clip_score": 18.78125, "loss": 4.660710334777832 }, { "id": "identity_269718", "image": "PMC-VQA/images/PMC8380352_Fig5_38840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is represented by the light blue circles?\nChoices:\n(A) Blood cells\n(B) Iron-positive cells\n(C) Green-colored cells\n(D) Normal cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.575699806213379 }, { "id": "identity_334796", "image": "A-OKVQA/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What pattern is on the bike seat?\nChoices:\n(A) leopard print\n(B) stripes\n(C) zebra print\n(D) plaid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.34375, "loss": 4.132850170135498 }, { "id": "identity_286885", "image": "A-OKVQA/images/498786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is stored on the rack above the glasses? Is it:\n(A) alcoholic beverages\n(B) spices\n(C) sparkling water\n(D) sodas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.328125, "loss": 4.390653610229492 }, { "id": "identity_38494", "image": "A-OKVQA/images/326236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the group on the horses doing?\nChoices:\n(A) conquering\n(B) racing\n(C) touring\n(D) fighting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.300065517425537 }, { "id": "identity_5037", "image": "ScienceQA/images/1919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Evan's experiment can best answer.\nChoices:\n(A) Do steel nails rust in fewer days when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails take fewer days to rust in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.151533603668213 }, { "id": "identity_242590", "image": "TabMWP/images/29629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which movie starts immediately after the movie that ends at 8:25 P.M.?" }, { "from": "gpt", "value": "The answer is none" } ], "clip_score": 28.9375, "loss": 4.882750034332275 }, { "id": "identity_269559", "image": "A-OKVQA/images/486449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options best describes the location? \n(A) car dealer\n(B) subway\n(C) biker bar\n(D) walmart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.609375, "loss": 4.751247406005859 }, { "id": "identity_2669", "image": "DVQA/images/bar_train_00037100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is code" } ], "clip_score": 17.953125, "loss": 4.171520709991455 }, { "id": "identity_324964", "image": "A-OKVQA/images/434990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be used for identification in this situation?\nChoices:\n(A) sign\n(B) license plate\n(C) snow\n(D) passport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.015625, "loss": 4.875609397888184 }, { "id": "identity_330988", "image": "TabMWP/images/855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.078125, "loss": 4.234372138977051 }, { "id": "identity_322685", "image": "PMC-VQA/images/PMC6920455_Fig3_86881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the feature visible in this tissue?\nChoices:\n(A) Lesions \n(B) Contrast uptake \n(C) Airways \n(D) Tissue preservation." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.125, "loss": 4.630964756011963 }, { "id": "identity_158087", "image": "AI2D/images/1878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many producers are shown in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.25, "loss": 4.84433650970459 }, { "id": "identity_212452", "image": "VQA2.0/images/542027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.0, "loss": 4.463770866394043 }, { "id": "identity_273126", "image": "FigureQA/images/97287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet the curve with the smallest area underneath it?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.064274787902832 }, { "id": "identity_298520", "image": "ScienceQA/images/8186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary persuasive appeal utilized in this advertisement? Is it:\n(A) ethos (character)\n(B) logos (reason)\n(C) pathos (emotion)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.59375, "loss": 4.716997146606445 }, { "id": "identity_30184", "image": "PMC-VQA/images/PMC4416891_pone.0121838.g015_382732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the remaining images? \nChoices:\n(A) To confuse the user \n(B) To provide a visual distraction \n(C) To showcase the different types of sagittal class \n(D) To showcase the different types of frontal class " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.59375, "loss": 4.086709022521973 }, { "id": "identity_287079", "image": "PMC-VQA/images/PMC4376809_Fig22_372382.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which ventricle is the lesion located?\nChoices:\n(A) Lateral ventricle\n(B) Fourth ventricle\n(C) Third ventricle\n(D) Interventricular foramen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.40625, "loss": 4.0612311363220215 }, { "id": "identity_187665", "image": "VQA2.0/images/223454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding in the cart?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.4375, "loss": 4.322998523712158 }, { "id": "identity_266968", "image": "TextVQA/images/070d47f646990950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of alcohol is that?" }, { "from": "gpt", "value": "The answer is barrique" } ], "clip_score": 20.359375, "loss": 4.840548992156982 }, { "id": "identity_136449", "image": "DocVQA/images/hycv0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is presenting on the topic of hazardous drinking?" }, { "from": "gpt", "value": "The answer is Dr. Wolfgang Schmidt" } ], "clip_score": 27.84375, "loss": 4.23906946182251 }, { "id": "identity_317212", "image": "FigureQA/images/56175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.25, "loss": 4.332474231719971 }, { "id": "identity_197826", "image": "VizWiz/images/VizWiz_train_00016957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many antennas can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.984375, "loss": 4.497211456298828 }, { "id": "identity_272745", "image": "AI2D/images/1197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body does the letter C represent?\nChoices:\n(A) Abdomen\n(B) Legs\n(C) Head\n(D) Eyes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.140625, "loss": 4.449717044830322 }, { "id": "identity_289699", "image": "PMC-VQA/images/PMC6855517_F1_68279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging result was derived from the radiological examination?\nOptions:\n(A) 2D reconstruction of CT images\n(B) 3D reconstruction of PET images\n(C) 3D reconstruction of MRI images\n(D) 2D reconstruction of X-ray images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.415541648864746 }, { "id": "identity_135604", "image": "MapQA/images/map_4860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of hundreds between 26.1% and 29.1%?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 30.390625, "loss": 4.0860724449157715 }, { "id": "identity_268278", "image": "ScienceQA/images/12945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does a pair of scissors belong to?\nChoices:\n(A) solid\n(B) liquid" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.921875, "loss": 4.124314308166504 }, { "id": "identity_187174", "image": "A-OKVQA/images/552054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many harness straps are visible on the horse?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.15625, "loss": 4.596551418304443 }, { "id": "identity_292483", "image": "AI2D/images/4717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe an egg-shaped structure with the narrow end attached to the stalk?\nChoices:\n(A) rhomboidal\n(B) lanceolate\n(C) spatulate\n(D) obovate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.203125, "loss": 4.038147926330566 }, { "id": "identity_44750", "image": "DocVQA/images/pfgn0226_145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the investigator mentioned in the telephone correspondence?" }, { "from": "gpt", "value": "The answer is Brian Haas" } ], "clip_score": 28.859375, "loss": 4.374606609344482 }, { "id": "identity_307319", "image": "PMC-VQA/images/PMC2915987_F1_70688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the bronchus in the CT image?\nChoices:\n(A) Contracted \n(B) Dilated \n(C) Collapsed \n(D) Clear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.425446510314941 }, { "id": "identity_334644", "image": "DocVQA/images/xfhb0228_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the initial heading in the document?" }, { "from": "gpt", "value": "The answer is clerical section" } ], "clip_score": 20.90625, "loss": 4.1244988441467285 }, { "id": "identity_106763", "image": "A-OKVQA/images/485413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.078125, "loss": 4.337457656860352 }, { "id": "identity_103371", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team uniform of the batter?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.28125, "loss": 4.383247375488281 }, { "id": "identity_307028", "image": "FigureQA/images/53633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum value for Dodger Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.046875, "loss": 4.843508243560791 }, { "id": "identity_338029", "image": "PMC-VQA/images/PMC8864031_Fig8_199970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) Clusters of amorphous silicon\n(B) Clusters of amorphous diamond\n(C) Clusters of amorphous copper\n(D) Clusters of amorphous iron" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.125, "loss": 4.313641548156738 }, { "id": "identity_117560", "image": "A-OKVQA/images/260901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the building with the red front wall?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 16.59375, "loss": 5.1866841316223145 }, { "id": "identity_284616", "image": "TextVQA/images/03b10f45eb118bd5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of camera is this?" }, { "from": "gpt", "value": "The answer is canon" } ], "clip_score": 19.625, "loss": 4.315764427185059 }, { "id": "identity_330228", "image": "PMC-VQA/images/PMC6521358_Fig10_481108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view best shows the position of the needle?\nChoices:\n(A) Sagittal view \n(B) Coronal view \n(C) Transverse view \n(D) Axial view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.109375, "loss": 4.294280529022217 }, { "id": "identity_282811", "image": "ScienceQA/images/11541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? Is it:\n(A) Saint Vincent and the Grenadines\n(B) Barbados\n(C) Saint Lucia\n(D) Cuba" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 4.3893723487854 }, { "id": "identity_265099", "image": "Super-CLEVR/images/superCLEVR_new_000751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the metallic bicycle the same size as the red metal object in front of the tiny red thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.4375, "loss": 5.139275074005127 }, { "id": "identity_292193", "image": "VQA2.0/images/29189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On what type of leash is this dog?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 23.0625, "loss": 4.007240295410156 }, { "id": "identity_187572", "image": "VQA-AS/images/8993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are burning in the fireplace?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.1875, "loss": 4.113786220550537 }, { "id": "identity_205997", "image": "A-OKVQA/images/314297.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many skateboarders are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 27.859375, "loss": 4.158196449279785 }, { "id": "identity_274427", "image": "VQA2.0/images/366812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By whom is this performer sponsored?" }, { "from": "gpt", "value": "The answer is olde nj" } ], "clip_score": 16.375, "loss": 4.895732402801514 }, { "id": "identity_112930", "image": "PMC-VQA/images/PMC9023451_Fig1_257887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical part is being focused on in the cross-sectional view on the right side of the image?" }, { "from": "gpt", "value": "The answer is Forearm" } ], "clip_score": 26.109375, "loss": 4.94732141494751 }, { "id": "identity_121487", "image": "VQA2.0/images/268306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.078125, "loss": 4.710782527923584 }, { "id": "identity_218235", "image": "TextVQA/images/00672399e1e9e074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the city mentioned on the scale?" }, { "from": "gpt", "value": "The answer is Webb City" } ], "clip_score": 19.3125, "loss": 4.390097618103027 }, { "id": "identity_230563", "image": "TextVQA/images/d1037638d96a3764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the color of the building directly behind the mytaxi cab, and how many windows does it have on the visible side?" }, { "from": "gpt", "value": "The answer is white, eight" } ], "clip_score": 31.734375, "loss": 4.649505615234375 }, { "id": "identity_1441", "image": "A-OKVQA/images/339019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person next to the black banner rent?\nChoices:\n(A) umbrellas\n(B) surfing equipment\n(C) swimsuits\n(D) floaties" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.546875, "loss": 4.370268821716309 }, { "id": "identity_260545", "image": "TextVQA/images/2ed4c1e66632b105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street where the big truck on the left is located, and what is the color of the traffic light directly above the truck?" }, { "from": "gpt", "value": "The answer is Essex St, red" } ], "clip_score": 24.109375, "loss": 4.228206157684326 }, { "id": "identity_294304", "image": "PMC-VQA/images/PMC6555215_F1_490264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of fracture is visible in the CT scan? \nOptions:\n(A) Compression fracture \n(B) Burst fracture \n(C) Avulsion fracture \n(D) Stress fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.640625, "loss": 4.049798011779785 }, { "id": "identity_23061", "image": "A-OKVQA/images/106635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a bright circle?\nChoices:\n(A) sun light\n(B) edited in\n(C) bright lamp\n(D) laser light" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.58203125, "loss": 4.214599132537842 }, { "id": "identity_312317", "image": "FigureQA/images/30738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green larger than Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.28125, "loss": 4.182000160217285 }, { "id": "identity_6405", "image": "IconQA/images/23748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.0625, "loss": 4.3717427253723145 }, { "id": "identity_204868", "image": "TextVQA/images/6af70be36bbe7cca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many characters in costumes are visible on the field?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.75, "loss": 4.699518203735352 }, { "id": "identity_309041", "image": "IconQA/images/9181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a crab. Which one is sixth?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.8125, "loss": 4.195959568023682 }, { "id": "identity_202062", "image": "PMC-VQA/images/PMC4645184_f6_443921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any structures in the image that are stained green?" }, { "from": "gpt", "value": "The answer is Yes, thin thread-like structures" } ], "clip_score": 19.125, "loss": 4.249642848968506 }, { "id": "identity_14183", "image": "PMC-VQA/images/PMC7862981_Fig1_360720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What information is provided by the Figure? \nChoices:\n(A) Fluorescent color coding \n(B) Scan interpretation \n(C) Dates of scan performance \n(D) Animal behavior " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 4.285906791687012 }, { "id": "identity_310407", "image": "ScienceQA/images/19865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could the test show?\nChoices:\n(A) how much the drone weighed with the blade guards\n(B) if adding the blade guards made the drone fly poorly\n(C) if the blade guards would break in a crash" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.074243068695068 }, { "id": "identity_14437", "image": "IconQA/images/97431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a boat. Which picture is tenth?\nChoices:\n(A) bucket\n(B) crab\n(C) boat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.40625, "loss": 4.167160987854004 }, { "id": "identity_21956", "image": "A-OKVQA/images/85706.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of skill is required at the slope in the foreground here?\nChoices:\n(A) olympic\n(B) professional\n(C) beginner\n(D) hot dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.1875, "loss": 4.297958850860596 }, { "id": "identity_265737", "image": "VQA2.0/images/575590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of buses are those?" }, { "from": "gpt", "value": "The answer is school" } ], "clip_score": 26.015625, "loss": 4.5576887130737305 }, { "id": "identity_298618", "image": "ScienceQA/images/19773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the bubble map, which sport has been included in both the Summer and Winter Olympics out of the following options:\n(A) ice hockey\n(B) biathlon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.1875, "loss": 4.122919082641602 }, { "id": "identity_321441", "image": "DVQA/images/bar_train_00171447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is desire" } ], "clip_score": 23.28125, "loss": 4.0100507736206055 }, { "id": "identity_323032", "image": "PMC-VQA/images/PMC7876301_F2_365894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of the study mentioned here?\nChoices:\n(A) Blood circulation\n(B) White matter structures\n(C) Gray matter structures\n(D) Muscle fatigue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.803285598754883 }, { "id": "identity_303574", "image": "PMC-VQA/images/PMC7140673_cells-09-00730-f002_147347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in red?\nChoices:\n(A) Macrophages\n(B) CD163 staining\n(C) IRF5 expression\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.765625, "loss": 4.628863334655762 }, { "id": "identity_295921", "image": "A-OKVQA/images/356384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the objective that the individual on the board is attempting to uphold?\nChoices:\n(A) dryness\n(B) height\n(C) speed\n(D) balance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.5625, "loss": 4.42924690246582 }, { "id": "identity_196572", "image": "A-OKVQA/images/449463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of body of water are the elephants walking through?" }, { "from": "gpt", "value": "The answer is Stream" } ], "clip_score": 26.765625, "loss": 4.296705722808838 }, { "id": "identity_324520", "image": "IconQA/images/21394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) impossible\n(B) certain\n(C) probable\n(D) unlikely" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.1875, "loss": 4.363246440887451 }, { "id": "identity_317273", "image": "VizWiz/images/VizWiz_train_00018502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any dosage information visible on this? Thank you." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.140625, "loss": 4.355288505554199 }, { "id": "identity_312490", "image": "PMC-VQA/images/PMC6258168_Fig1_401285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the numbers above the images indicate?\nChoices:\n(A) Range of T values \n(B) MDD group coordinates \n(C) HCs group coordinates \n(D) Range of MNI z coordinates" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.953125, "loss": 4.572901248931885 }, { "id": "identity_296273", "image": "A-OKVQA/images/320875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for her extending her hands outward?\nChoices:\n(A) threatening woman\n(B) is gesturing\n(C) eating tacos\n(D) drying out" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.203125, "loss": 4.765916347503662 }, { "id": "identity_269595", "image": "TextVQA/images/b9964d4008c1c828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what way does the graffiti change the interpretation of the sign by people?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.0, "loss": 4.615386962890625 }, { "id": "identity_299781", "image": "A-OKVQA/images/251427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason preventing them from traveling among the following options: (A) no wind, (B) too rainy, (C) no gas, or (D) no water?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.9453125, "loss": 4.398199081420898 }, { "id": "identity_337876", "image": "ScienceQA/images/19000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Minneapolis\n(B) Springfield\n(C) Milwaukee\n(D) Madison" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.953125, "loss": 4.124119281768799 }, { "id": "identity_310857", "image": "PMC-VQA/images/PMC5874961_acm20315-fig-0012_295162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the brown line indicate in this image?\nChoices:\n(A) Two beam directions.\n(B) Isocenter plane.\n(C) Outline of PTV.\n(D) Outlines of right and left lungs." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.703125, "loss": 4.395108222961426 }, { "id": "identity_299298", "image": "PMC-VQA/images/PMC5707219_Fig6_245867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following medical conditions is part of the patient's medical history: Diverticulitis, Appendicitis, Crohn's Disease, or Pancreatitis?\nChoices:\n(A) Diverticulitis \n(B) Appendicitis \n(C) Crohn's Disease \n(D) Pancreatitis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.046875, "loss": 4.545736789703369 }, { "id": "identity_33004", "image": "DVQA/images/bar_train_00099345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the left in each group?" }, { "from": "gpt", "value": "The answer is task" } ], "clip_score": 15.765625, "loss": 4.05004358291626 }, { "id": "identity_312623", "image": "FigureQA/images/15088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9453125, "loss": 4.46558952331543 }, { "id": "identity_305792", "image": "TQA/images/question_images/erosion_8068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is beneath the hard rock?\nChoices:\n(A) Water\n(B) Soft rock\n(C) Nothing\n(D) Stream" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.4375, "loss": 4.172089099884033 }, { "id": "identity_34756", "image": "PMC-VQA/images/PMC6020486_fig1_333820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality seen in the left eye? \nChoices:\n(A) Optic nerve atrophy \n(B) Swelling of the inferior rectus muscle \n(C) Corneal ulcers \n(D) Subretinal hemorrhage " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.15625, "loss": 4.164093017578125 }, { "id": "identity_311971", "image": "TabMWP/images/38321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Some individuals played golf and compared their scores. Who achieved the highest score?\nChoices:\n(A) Ellen\n(B) Farid\n(C) Kaylee\n(D) Jon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 41.90625, "loss": 4.045917510986328 }, { "id": "identity_289098", "image": "TextVQA/images/17d2127854340e4f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of machine is that?" }, { "from": "gpt", "value": "The answer is dell" } ], "clip_score": 20.640625, "loss": 4.991816520690918 }, { "id": "identity_36067", "image": "VQA2.0/images/305916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the water calm?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.6015625, "loss": 4.02260160446167 }, { "id": "identity_261105", "image": "ScienceQA/images/21123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the climatic conditions of the Tibetan Plateau influence its biodiversity and the types of vegetation found there?" }, { "from": "gpt", "value": "The answer is Sparse vegetation adapted to cold, few evergreen trees" } ], "clip_score": 29.84375, "loss": 4.201601028442383 }, { "id": "identity_283704", "image": "A-OKVQA/images/287737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man engaged in with the red machine?\nChoices:\n(A) making cider\n(B) coring/peeling apples\n(C) cutting vegetables\n(D) making juice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.09375, "loss": 4.409031867980957 }, { "id": "identity_32216", "image": "PMC-VQA/images/PMC4956624_Fig26_53972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What diagnostic tool was likely utilized to make this observation? \nChoices:\n(A) X-ray. \n(B) MRI. \n(C) CT scan. \n(D) Echocardiogram. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.09375, "loss": 4.645877361297607 }, { "id": "identity_114839", "image": "TextVQA/images/aedc9664f1afa028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the box?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.6875, "loss": 4.055069923400879 }, { "id": "identity_100", "image": "IconQA/images/97996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.080498695373535 }, { "id": "identity_36087", "image": "PMC-VQA/images/PMC6390271_fig2_441021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of X-ray view was performed on the 70-year-old man? \nChoices:\n(A) Oblique \n(B) Lateral \n(C) Anterior-posterior \n(D) Reverse. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.25, "loss": 4.6911139488220215 }, { "id": "identity_221200", "image": "VQA-AS/images/5705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of weather does the image suggest?" }, { "from": "gpt", "value": "The answer is Fair weather" } ], "clip_score": 20.109375, "loss": 4.513243198394775 }, { "id": "identity_292046", "image": "A-OKVQA/images/294814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of animals are depicted in the image?\nChoices:\n(A) tiger\n(B) lion\n(C) horse\n(D) zebra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.46875, "loss": 4.520870208740234 }, { "id": "identity_18545", "image": "DVQA/images/bar_train_00174194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the largest accuracy summed across all the datasets?" }, { "from": "gpt", "value": "The answer is total" } ], "clip_score": 35.40625, "loss": 4.0441389083862305 }, { "id": "identity_325055", "image": "A-OKVQA/images/281970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person surrounded by?\nChoices:\n(A) wine bottles\n(B) elk\n(C) video games\n(D) books" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.3125, "loss": 4.490130424499512 }, { "id": "identity_288569", "image": "PMC-VQA/images/PMC4634919_Fig5_441540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the tool utilized to visualize the microsphere silhouettes? \nChoices:\n(A) Electrons\n(B) X-rays\n(C) Stains\n(D) Microscopes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.703125, "loss": 4.758038520812988 }, { "id": "identity_311890", "image": "Super-CLEVR/images/superCLEVR_new_000216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any tiny shiny objects to the right of this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.921875, "loss": 4.983113765716553 }, { "id": "identity_329193", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f006_217799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What unusual characteristic is seen in this image of the aortic root?\nChoices:\n(A) A normal coronary artery.\n(B) A CAA traversing over an aortic valve annulus.\n(C) A CAA traversing over a pulmonary valve annulus.\n(D) An aortic root dilation." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.34375, "loss": 4.3076348304748535 }, { "id": "identity_334200", "image": "PMC-VQA/images/PMC6861408_Fig21_70111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality seen surrounding certain arteries on the left side?\nChoices:\n(A) Fat deposition\n(B) Calcification\n(C) Lymphadenopathy\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.792217254638672 }, { "id": "identity_278116", "image": "TQA/images/abc_question_images/earth_eclipses_10681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the letter that represents the Umbra among the following choices: (A) K, (B) U, (C) D, or (D) V?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 4.798841953277588 }, { "id": "identity_72131", "image": "AI2D/images/560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the structure labeled \"Vein\" contribute to the leaf's function?" }, { "from": "gpt", "value": "The answer is Transport and support" } ], "clip_score": 31.890625, "loss": 5.5434980392456055 }, { "id": "identity_298061", "image": "ScienceQA/images/6383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area?\nChoices:\n(A) the Federated States of Micronesia\n(B) the Marshall Islands\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.5625, "loss": 4.586777687072754 }, { "id": "identity_307671", "image": "FigureQA/images/26925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.53125, "loss": 4.210853576660156 }, { "id": "identity_15434", "image": "AI2D/images/527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the Diagram stage H represents what?\nChoices:\n(A) metamorphic rock\n(B) sediments\n(C) sedimentary rock\n(D) igneous rock" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.5, "loss": 4.136756420135498 }, { "id": "identity_333742", "image": "PMC-VQA/images/PMC4926393_ijms-17-00859-f004_46281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows show in these images?\nChoices:\n(A) Undifferentiated cells\n(B) Infected cells\n(C) Differentiated cells\n(D) Dead cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.421875, "loss": 4.616736888885498 }, { "id": "identity_288616", "image": "AI2D/images/3888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what does K represent?\nChoices:\n(A) terminal bud scale scare\n(B) lenticels\n(C) bundle scars\n(D) leaf scar" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.96875, "loss": 4.375628471374512 }, { "id": "identity_327499", "image": "ScienceQA/images/8875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these two items?\nChoices:\n(A) blue\n(B) scratchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.828125, "loss": 4.0813446044921875 }, { "id": "identity_196729", "image": "Super-CLEVR/images/superCLEVR_new_001399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is to the left of the large gray car?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 29.796875, "loss": 4.479264736175537 }, { "id": "identity_27045", "image": "ScienceQA/images/13668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of blue particles?\nChoices:\n(A) Solution A\n(B) Solution B\n(C) neither; their concentrations are the same" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.53125, "loss": 4.1026716232299805 }, { "id": "identity_280605", "image": "Super-CLEVR/images/superCLEVR_new_000947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the small car located to the right of the large red object the same as the material of the large gray object on the left side of the big road bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.234375, "loss": 5.1313276290893555 }, { "id": "identity_157922", "image": "AI2D/images/1955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are represented in the food web diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.90625, "loss": 4.107796669006348 }, { "id": "identity_309513", "image": "A-OKVQA/images/380348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is meant to rock back and forth?\nChoices:\n(A) chair\n(B) cabinet\n(C) table\n(D) stool" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2734375, "loss": 4.419705390930176 }, { "id": "identity_20431", "image": "DVQA/images/bar_train_00080004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.0625, "loss": 4.170436859130859 }, { "id": "identity_334957", "image": "PMC-VQA/images/PMC8607350_FIG3_116842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed within the fat in both feet?\nChoices:\n(A) Blood vessels \n(B) Fibrous strands \n(C) Cartilage \n(D) Muscles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.776719570159912 }, { "id": "identity_261488", "image": "PMC-VQA/images/PMC8537352_insects-12-00850-f002_92118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed inside the gall?\nChoices:\n(A) Unknown spore and veins \n(B) Radial files of epidermal cells and silk thread \n(C) Stomata-like opening and unknown spore \n(D) Unknown spore and caterpillar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.375, "loss": 4.308254718780518 }, { "id": "identity_270638", "image": "ScienceQA/images/11706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Pennsylvania\n(B) Connecticut\n(C) New Jersey\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0, "loss": 4.44366455078125 }, { "id": "identity_325858", "image": "PMC-VQA/images/PMC6447645_Fig7_460859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the lateral ulnar collateral ligament wrap around?\nChoices:\n(A) Anterior aspect of the radial neck. \n(B) Posterior aspect of the radial neck. \n(C) Anterior aspect of the ulnar neck. \n(D) Posterior aspect of the ulnar neck." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.609375, "loss": 4.170266151428223 }, { "id": "identity_283934", "image": "A-OKVQA/images/408528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is responsible for blending things in the green based glass pitcher: wheel turning, electric motor, poodles, or solar energy?\nChoices:\n(A) wheel turning\n(B) electric motor\n(C) poodles\n(D) solar energy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.109375, "loss": 4.358944892883301 }, { "id": "identity_211791", "image": "VQA2.0/images/341834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 26.15625, "loss": 4.347855091094971 }, { "id": "identity_306836", "image": "PMC-VQA/images/PMC3180775_fig1_110121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which condyle experienced bone loss?\nChoices:\n(A) Medial \n(B) Lateral \n(C) Both \n(D) Neither" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 4.075298309326172 }, { "id": "identity_8700", "image": "Super-CLEVR/images/superCLEVR_new_000369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fighters that have the same material as the big brown motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.359375, "loss": 5.186495304107666 }, { "id": "identity_200193", "image": "PMC-VQA/images/PMC9369276_jcm-11-04371-f004_369686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is Figure label \"F" } ], "clip_score": 17.140625, "loss": 5.0309014320373535 }, { "id": "identity_314392", "image": "PMC-VQA/images/PMC7382072_Fig8_220157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to stain the cells?\nChoices:\n(A) Wright-Giemsa stain\n(B) Masson's trichrome stain\n(C) Hematoxylin and Eosin stain\n(D) Toluidine blue stain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.8125, "loss": 4.270668983459473 }, { "id": "identity_118711", "image": "VQA-AS/images/6541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are visible in the image?" }, { "from": "gpt", "value": "The answer is Five flowers" } ], "clip_score": 14.6171875, "loss": 4.470118045806885 }, { "id": "identity_327353", "image": "FigureQA/images/64132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Dark Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.203125, "loss": 4.0186767578125 }, { "id": "identity_34341", "image": "PMC-VQA/images/PMC4000658_fig2_284438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is image A? \nChoices:\n(A) CT, \n(B) PET, \n(C) MR, \n(D) X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.859375, "loss": 4.312893867492676 }, { "id": "identity_117073", "image": "TextVQA/images/e67e7f98c9be2a99.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many construction workers can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.515625, "loss": 4.542830467224121 }, { "id": "identity_295158", "image": "A-OKVQA/images/147653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man on the board performing on the ledge?\nChoices:\n(A) flipping\n(B) grinding\n(C) waxing\n(D) manualing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.486681938171387 }, { "id": "identity_302382", "image": "Super-CLEVR/images/superCLEVR_new_001158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the big matte object behind the bike that is to the right of the vehicle next to the school bus?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 27.09375, "loss": 4.034803867340088 }, { "id": "identity_133795", "image": "A-OKVQA/images/323853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the apples?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.25, "loss": 4.010910511016846 }, { "id": "identity_222768", "image": "A-OKVQA/images/104486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the luggage cart have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.390625, "loss": 4.167248725891113 }, { "id": "identity_280074", "image": "PMC-VQA/images/PMC6465293_Fig5_465534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red outlined area in the image represent?\nChoices:\n(A) Normal mucosa\n(B) Connective tissue damage\n(C) Necrosis within the tumor\n(D) Tumor beneath the mucosa" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.578125, "loss": 4.269316673278809 }, { "id": "identity_316178", "image": "PMC-VQA/images/PMC7684318_Fig3_303791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ exhibited mild congestion before the challenge?\nChoices:\n(A) Lungs \n(B) Liver \n(C) Kidney \n(D) Heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.734375, "loss": 4.319472312927246 }, { "id": "identity_222368", "image": "TextVQA/images/8110771084bbabe9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the floor?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 15.8984375, "loss": 4.418839931488037 }, { "id": "identity_132734", "image": "A-OKVQA/images/518685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tent on the left side of the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.84375, "loss": 4.092601776123047 }, { "id": "identity_215954", "image": "TextVQA/images/25e818806e9e42de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots are on the stove?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.78125, "loss": 4.254565715789795 }, { "id": "identity_273762", "image": "PMC-VQA/images/PMC4604447_fig3_433019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the behavior observation of control mice, which part of their body is being suspended?\nChoices:\n(A) tail\n(B) head\n(C) hindlimbs\n(D) forelimbs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.487858772277832 }, { "id": "identity_16837", "image": "A-OKVQA/images/252280.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Of vehicles seen here which are greenest in regards to emissions?\nChoices:\n(A) motorcycle\n(B) bikes\n(C) bus\n(D) car" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.578125, "loss": 4.350717544555664 }, { "id": "identity_303908", "image": "A-OKVQA/images/526432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of worker uses this space?\nChoices:\n(A) janitor\n(B) construction\n(C) dentist\n(D) clerical" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.296875, "loss": 4.2849345207214355 }, { "id": "identity_56090", "image": "FigureQA/images/95318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a value of 10 or less at x-axis label 0?" }, { "from": "gpt", "value": "The answer is Four colors" } ], "clip_score": 23.1875, "loss": 4.908374309539795 }, { "id": "identity_30302", "image": "PMC-VQA/images/PMC3120882_pone-0021411-g003_100089.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the insert at the lower left of panel j? \nChoices:\n(A) higher magnification of a neuron \n(B) higher magnification of a astrocyte \n(C) higher magnification of a microglial cell \n(D) higher magnification of a oligodendrocyte " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.390625, "loss": 4.154337406158447 }, { "id": "identity_266596", "image": "A-OKVQA/images/466882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which type of field is a similar sport played? \nChoices:\n(A) soccer\n(B) racquetball\n(C) ice hockey\n(D) water polo" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.046875, "loss": 4.540488243103027 }, { "id": "identity_135451", "image": "DocVQA/images/kgpj0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What should registration only be for, according to the document?" }, { "from": "gpt", "value": "The answer is Where plants are" } ], "clip_score": 24.40625, "loss": 5.995945453643799 }, { "id": "identity_326639", "image": "A-OKVQA/images/498339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are individuals here to do?\nChoices:\n(A) worship\n(B) travel\n(C) work\n(D) shop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.4921875, "loss": 4.569838047027588 }, { "id": "identity_280928", "image": "TextVQA/images/c32bda4ba1ed67bc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the brand of the shirt this person is wearing?" }, { "from": "gpt", "value": "The answer is jako" } ], "clip_score": 17.640625, "loss": 5.412223815917969 }, { "id": "identity_268185", "image": "PMC-VQA/images/PMC8126899_os13015-fig-0005_443332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of imaging is shown for C2 among the following options: 2D imaging, 3D imaging, CT scan, or X-ray?\nChoices:\n(A) 2D imaging \n(B) 3D imaging \n(C) CT scan \n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.25, "loss": 4.105781078338623 }, { "id": "identity_220185", "image": "A-OKVQA/images/94453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of parking permit is mentioned on the sign?" }, { "from": "gpt", "value": "The answer is Sticker parking" } ], "clip_score": 17.28125, "loss": 4.051870822906494 }, { "id": "identity_18502", "image": "Super-CLEVR/images/superCLEVR_new_000251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the brown rubber thing have the same size as the object that is on the right side of the tiny yellow object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.875, "loss": 5.056434631347656 }, { "id": "identity_188165", "image": "PMC-VQA/images/PMC6431731_fig2_456261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to areas of interest in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.90625, "loss": 4.331864356994629 }, { "id": "identity_334502", "image": "A-OKVQA/images/197167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what direction is the narrowest visible part of the board pointing?\nChoices:\n(A) west\n(B) south\n(C) north\n(D) east" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.5859375, "loss": 4.082496166229248 }, { "id": "identity_310457", "image": "FigureQA/images/27804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.25, "loss": 4.101483345031738 }, { "id": "identity_6531", "image": "VizWiz/images/VizWiz_train_00008960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code in the picture?" }, { "from": "gpt", "value": "The answer is duqint" } ], "clip_score": 23.125, "loss": 4.128978729248047 }, { "id": "identity_297503", "image": "PMC-VQA/images/PMC2397490_F9_23367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of animal was utilized in the experiment mentioned in the caption?\nChoices:\n(A) human\n(B) rat\n(C) mouse\n(D) pig" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.84375, "loss": 4.508975028991699 }, { "id": "identity_307045", "image": "DVQA/images/bar_train_00062772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fifth group of bars?" }, { "from": "gpt", "value": "The answer is movie" } ], "clip_score": 19.390625, "loss": 4.56308650970459 }, { "id": "identity_20209", "image": "A-OKVQA/images/572725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been rubbed off the top of the railing shown here?\nChoices:\n(A) food\n(B) grass\n(C) paint\n(D) nothing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.9453125, "loss": 4.15761137008667 }, { "id": "identity_108599", "image": "VizWiz/images/VizWiz_train_00012784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are present on the top?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.140625, "loss": 4.610852241516113 }, { "id": "identity_33291", "image": "PMC-VQA/images/PMC3290045_F0001_127783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the sonography of the pelvis show? \nChoices:\n(A) Normal pelvis \n(B) Small cystic lesion \n(C) Large cystic lesion \n(D) Solid lesion. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.9375, "loss": 4.518367767333984 }, { "id": "identity_325614", "image": "A-OKVQA/images/245497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is known for doing this?\nChoices:\n(A) maria sharapova\n(B) bam margera\n(C) idris elba\n(D) david ortiz" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2109375, "loss": 4.4490485191345215 }, { "id": "identity_273571", "image": "PMC-VQA/images/PMC5348209_F1_138832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the MRI scan, in which part of the brain is the focal lesion located?\nChoices:\n(A) Frontal lobe\n(B) Temporal lobe\n(C) Parietal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.375, "loss": 4.2938079833984375 }, { "id": "identity_296827", "image": "PMC-VQA/images/PMC4764584_fig3s6_4846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of tissue is depicted in the image provided? Is it:\n(A) Muscle tissue\n(B) Brain tissue\n(C) Liver tissue\n(D) Skin tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.609375, "loss": 5.046597480773926 }, { "id": "identity_131465", "image": "PMC-VQA/images/PMC4937708_F1_49450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many openings or ports are visible on the top of the object?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.625, "loss": 4.623605728149414 }, { "id": "identity_335674", "image": "PMC-VQA/images/PMC8123265_jcm-10-01861-f003_441269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object pierced the eye? \nChoices:\n(A) Needle \n(B) Pin \n(C) Wire \n(D) Bullet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.53125, "loss": 4.105612277984619 }, { "id": "identity_265172", "image": "TextVQA/images/ba7979567646d71e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of development is indicated on the sign?" }, { "from": "gpt", "value": "The answer is error" } ], "clip_score": 10.6953125, "loss": 5.863180160522461 }, { "id": "identity_298976", "image": "ScienceQA/images/8290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from the surroundings to each aquarium or from each aquarium to the surroundings?\nChoices:\n(A) the surroundings . . . each aquarium\n(B) each aquarium . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.765625, "loss": 4.027329921722412 }, { "id": "identity_55978", "image": "DocVQA/images/pjhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many reviews were received according to the records?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.203125, "loss": 4.439909934997559 }, { "id": "identity_11622", "image": "PMC-VQA/images/PMC6022407_Fig8_334272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the upper layer of the microphotograph? \nChoices:\n(A) Granular \n(B) Polymorphic \n(C) Unstained \n(D) Untreated " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5625, "loss": 4.498319625854492 }, { "id": "identity_331572", "image": "TabMWP/images/36688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Ethan recorded the number of pages he read over the past 4 days. On which day did he read the most pages?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.25, "loss": 4.078326225280762 }, { "id": "identity_292482", "image": "A-OKVQA/images/18375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What function does Bank of America serve in the game?\nChoices:\n(A) loan provider\n(B) site provider\n(C) sponsor\n(D) fund provider" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.390625, "loss": 4.161086082458496 }, { "id": "identity_201975", "image": "TextVQA/images/0619b059d30924a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ring on the person's finger?" }, { "from": "gpt", "value": "The answer is Silver or white gold" } ], "clip_score": 23.859375, "loss": 4.024656295776367 }, { "id": "identity_332026", "image": "PMC-VQA/images/PMC5709000_F5_246781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located? \nChoices:\n(A) left uterine adnexa \n(B) pelvis \n(C) right uterine adnexa \n(D) right kidney" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.90625, "loss": 4.4991350173950195 }, { "id": "identity_320747", "image": "A-OKVQA/images/9708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color hair does the person have?\nChoices:\n(A) blue\n(B) blonde\n(C) red\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.75, "loss": 4.274157524108887 }, { "id": "identity_245394", "image": "A-OKVQA/images/37514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the oval object on the bike tire, and how does its design contribute to its functionality?" }, { "from": "gpt", "value": "The answer is The design ensures stability and ease of carrying items" } ], "clip_score": 15.0625, "loss": 4.195305824279785 }, { "id": "identity_16046", "image": "VQA-AS/images/17429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the woman sitting?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.5, "loss": 4.163859844207764 }, { "id": "identity_301203", "image": "TextVQA/images/6ee912ba0e67c958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first omelette being sold here?" }, { "from": "gpt", "value": "The answer is western" } ], "clip_score": 19.5, "loss": 5.114584922790527 }, { "id": "identity_292167", "image": "TQA/images/question_images/tides_128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In relation to the positions of the Earth and Sun, what is the orientation of the Neap Tide? \nChoices:\n(A) Perpendicular\n(B) Parallel\n(C) Reverse\n(D) Diagonal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.765625, "loss": 4.442873954772949 }, { "id": "identity_133614", "image": "Super-CLEVR/images/superCLEVR_new_001674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bicycle located to the right side of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 18.1875, "loss": 4.367101192474365 }, { "id": "identity_314994", "image": "PMC-VQA/images/PMC3883696_pone-0085128-g001_257142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the images shown?\nChoices:\n(A) infiltrating tumour cells\n(B) normal bone cells\n(C) osteoblasts\n(D) reactive new bone cells" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.375, "loss": 5.29765510559082 }, { "id": "identity_11005", "image": "AI2D/images/3857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What formes the defense barrier\nChoices:\n(A) palisade\n(B) wax cuticle\n(C) lower epidermis\n(D) stoma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 5.012263774871826 }, { "id": "identity_24778", "image": "PMC-VQA/images/PMC3919902_f1-etm-07-03-0583_265997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the right colon in the abdominal radiograph? \nChoices:\n(A) Fluid accumulation \n(B) Gas pockets \n(C) Fecal material \n(D) Air-filled loops. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.921875, "loss": 4.839425086975098 }, { "id": "identity_312470", "image": "AI2D/images/4873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of natural forming space is shown here?\nChoices:\n(A) Plateau\n(B) Sea\n(C) Volcano\n(D) Mountain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.921875, "loss": 4.047698497772217 }, { "id": "identity_287142", "image": "PMC-VQA/images/PMC6447645_Fig7_460859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Around which part does the lateral ulnar collateral ligament wrap? \nChoices:\n(A) Anterior aspect of the radial neck.\n(B) Posterior aspect of the radial neck.\n(C) Anterior aspect of the ulnar neck.\n(D) Posterior aspect of the ulnar neck." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.375, "loss": 4.337983131408691 }, { "id": "identity_186878", "image": "A-OKVQA/images/430731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is on the left side of the desk?" }, { "from": "gpt", "value": "The answer is Mobile phone" } ], "clip_score": 26.90625, "loss": 4.176827907562256 }, { "id": "identity_29936", "image": "PMC-VQA/images/PMC7310446_Fig2_197416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the nodule located? \nChoices:\n(A) Right upper lobe \n(B) Left lower lobe \n(C) Right lower lobe \n(D) Left upper lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.140625, "loss": 4.380197048187256 }, { "id": "identity_110424", "image": "VQA2.0/images/50961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pots are visible on the stove?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.4375, "loss": 4.296090602874756 }, { "id": "identity_298464", "image": "IconQA/images/47271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture comes after the first picture of a turtle?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5625, "loss": 4.374299049377441 }, { "id": "identity_231591", "image": "VQA2.0/images/14375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is depicted in the image, and what are the key elements that indicate this?" }, { "from": "gpt", "value": "The answer is A kite festival" } ], "clip_score": 25.28125, "loss": 4.326509952545166 }, { "id": "identity_25914", "image": "PMC-VQA/images/PMC6186048_Fig1_381635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiographs show the intervertebral disc calcifications in the example? \nChoices:\n(A) Anterior-posterior views \n(B) Lateral views \n(C) Oblique views \n(D) Cranial-caudal views " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.640625, "loss": 4.154291152954102 }, { "id": "identity_119858", "image": "VQA2.0/images/125275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are visible in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 27.5625, "loss": 4.682676792144775 }, { "id": "identity_275423", "image": "A-OKVQA/images/263453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the current situation or event taking place here?\nChoices:\n(A) umbrella sale\n(B) going home\n(C) surprise rain\n(D) u2 concert" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.6875, "loss": 4.046423435211182 }, { "id": "identity_320775", "image": "PMC-VQA/images/PMC4937711_F19_49533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the MRI finding in this region? \nChoices:\n(A) Hyperintense soft tissue deposit \n(B) Hypointense soft tissue deposit \n(C) Isointense soft tissue deposit \n(D) No soft tissue deposit seen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.84375, "loss": 4.429027080535889 }, { "id": "identity_277446", "image": "Super-CLEVR/images/superCLEVR_new_000293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the brown object located in front of the metallic object behind the yellow school bus?" }, { "from": "gpt", "value": "The answer is cruiser" } ], "clip_score": 21.359375, "loss": 5.1993489265441895 }, { "id": "identity_324367", "image": "A-OKVQA/images/185834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the green item is part of this person's religion, what are they most likely unfamiliar with?\nChoices:\n(A) diwali\n(B) baptism\n(C) extreme unction\n(D) christmas" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.8046875, "loss": 4.268635272979736 }, { "id": "identity_294816", "image": "Super-CLEVR/images/superCLEVR_new_000750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a smaller number of tiny jets compared to tiny purple metallic cruisers?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.625, "loss": 5.020501136779785 }, { "id": "identity_156920", "image": "ScienceQA/images/3645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature that separates the highlighted country from the one to its north?" }, { "from": "gpt", "value": "The answer is Rio Grande" } ], "clip_score": 23.0625, "loss": 4.117751598358154 }, { "id": "identity_15292", "image": "A-OKVQA/images/468953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they discussing?\nChoices:\n(A) traffic\n(B) gas cost\n(C) plans date\n(D) gun cost" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.0625, "loss": 4.3105854988098145 }, { "id": "identity_283729", "image": "PMC-VQA/images/PMC8796580_Fig2_179495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow in the image represent? Is it:\n(A) The tumour border.\n(B) The PRM for the ICG group.\n(C) The exact location of the tumor.\n(D) The tumour's blood supply." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.274879455566406 }, { "id": "identity_291902", "image": "A-OKVQA/images/539298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location did this event take place?\nChoices:\n(A) Brazil\n(B) China\n(C) Paris\n(D) Hong Kong" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.8046875, "loss": 4.21083927154541 }, { "id": "identity_266642", "image": "A-OKVQA/images/119666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company manufactured this vehicle?\nOptions:\n(A) Mercedes\n(B) Toyota\n(C) Audi\n(D) Hyundai" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.0078125, "loss": 4.034994125366211 }, { "id": "identity_285499", "image": "A-OKVQA/images/200643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country does this flag symbolize? \nOptions:\n(A) United Arab Emirates\n(B) Italy\n(C) Egypt\n(D) Romania" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.03125, "loss": 4.081872463226318 }, { "id": "identity_29451", "image": "ScienceQA/images/1538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) opaque\n(B) flexible\n(C) shiny" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5, "loss": 4.256705284118652 }, { "id": "identity_105185", "image": "VizWiz/images/VizWiz_train_00014440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object's body?" }, { "from": "gpt", "value": "The answer is Cuboid" } ], "clip_score": 22.40625, "loss": 4.600161075592041 }, { "id": "identity_284699", "image": "PMC-VQA/images/PMC4814887_phy212730-fig-0002_16870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was utilized for the analysis? \nOptions:\n(A) Gram staining\n(B) Periodic acid-Schiff\n(C) Hematoxylin-Eosin\n(D) Giemsa staining" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5, "loss": 4.0192084312438965 }, { "id": "identity_946", "image": "AI2D/images/3242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the cytoplasm?\nChoices:\n(A) m\n(B) j\n(C) d\n(D) n" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.578125, "loss": 4.419312000274658 }, { "id": "identity_312481", "image": "A-OKVQA/images/304008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is being performed with the frisbee?\nChoices:\n(A) throwing\n(B) blocking\n(C) slapping\n(D) catching" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.421875, "loss": 4.302975654602051 }, { "id": "identity_301881", "image": "FigureQA/images/90784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.1015625, "loss": 4.16718053817749 }, { "id": "identity_322203", "image": "Super-CLEVR/images/superCLEVR_new_000013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a brown rubber thing of the same shape as this tiny yellow rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 10.765625, "loss": 5.111309051513672 }, { "id": "identity_207346", "image": "A-OKVQA/images/392809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the horse in the image?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 21.078125, "loss": 4.410786151885986 }, { "id": "identity_183709", "image": "VQA-AS/images/19047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pants the person is wearing?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 17.765625, "loss": 4.060788631439209 }, { "id": "identity_269590", "image": "AI2D/images/3041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to the golgi apparatus among the following choices: F, M, N, or B?\nChoices:\n(A) F\n(B) M\n(C) N\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.515625, "loss": 4.5401105880737305 }, { "id": "identity_10640", "image": "AI2D/images/2959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The body opening through which an animal takes in food is called\nChoices:\n(A) tonsil\n(B) hard palate\n(C) mouth\n(D) tongue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.265625, "loss": 4.276718616485596 }, { "id": "identity_305305", "image": "ScienceQA/images/15946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is a mammal?\nChoices:\n(A) Galapagos giant tortoise\n(B) zebra" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.890625, "loss": 4.06044340133667 }, { "id": "identity_318717", "image": "ScienceQA/images/4278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Fill in the blank.\nCarbon dioxide is ().\nOptions:\n(A) a compound\n(B) an elementary substance" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.46875, "loss": 4.113146781921387 }, { "id": "identity_209377", "image": "A-OKVQA/images/446536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are visible in the window display?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 23.78125, "loss": 4.127163887023926 }, { "id": "identity_285537", "image": "AI2D/images/4109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of letter A in the given choices?\nChoices:\n(A) chlorophyll\n(B) O2\n(C) glucose\n(D) sunlight" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.198286533355713 }, { "id": "identity_198073", "image": "PMC-VQA/images/PMC7251749_Fig1_178969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any markers or symbols used on the graph?" }, { "from": "gpt", "value": "The answer is Yes, blue triangle markers" } ], "clip_score": 20.921875, "loss": 4.686801433563232 }, { "id": "identity_19962", "image": "A-OKVQA/images/253250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the boat is the person in the red shirt standing on?\nChoices:\n(A) bow\n(B) stern\n(C) transom\n(D) hull" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.8125, "loss": 4.488617897033691 }, { "id": "identity_311524", "image": "PMC-VQA/images/PMC6748141_Fig5_38873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the green stain?\nChoices:\n(A) Sox9\n(B) αSMA\n(C) CD44\n(D) MCDD" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.453125, "loss": 4.131400108337402 }, { "id": "identity_105162", "image": "TextVQA/images/d7f2c429f39dce0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What hairstyle is the person sporting?" }, { "from": "gpt", "value": "The answer is Long straight hair" } ], "clip_score": 22.765625, "loss": 4.599183559417725 }, { "id": "identity_111304", "image": "TextVQA/images/02fa299740ebc5ab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many air conditioning units are on the roof of the bus?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.1875, "loss": 4.794233798980713 }, { "id": "identity_276000", "image": "Super-CLEVR/images/superCLEVR_new_000159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the small metallic object to the right of the bus that is located in front of the metal dirtbike?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 25.328125, "loss": 5.1133341789245605 }, { "id": "identity_3487", "image": "FigureQA/images/55193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.796875, "loss": 4.541879177093506 }, { "id": "identity_321478", "image": "A-OKVQA/images/88538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these umbrellas being used for?\nChoices:\n(A) lighting\n(B) blocking sun\n(C) stopping rain\n(D) decoration" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.9296875, "loss": 4.399062156677246 }, { "id": "identity_251544", "image": "A-OKVQA/images/416101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis balls is the person holding in their hands, and what is the color of the tennis balls?" }, { "from": "gpt", "value": "The answer is Two, yellow" } ], "clip_score": 23.203125, "loss": 4.390291690826416 }, { "id": "identity_78244", "image": "ScienceQA/images/12969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many producers are depicted in the food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.625, "loss": 4.7270917892456055 }, { "id": "identity_274331", "image": "TQA/images/abc_question_images/human_system_circulatory_11379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the left lung?\nChoices:\n(A) S\n(B) V\n(C) Y\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.578125, "loss": 4.317615032196045 }, { "id": "identity_289026", "image": "Super-CLEVR/images/superCLEVR_new_001325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any red minivans share the same material as the red object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.421875, "loss": 5.020663261413574 }, { "id": "identity_5353", "image": "PMC-VQA/images/PMC3488525_F3_163820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vessels are compressed by the said mass? \nChoices:\n(A) Hepatic vein and hepatic artery \n(B) Hepatic vein and inferior vena cava \n(C) Hepatic portal and hepatic artery \n(D) Hepatic portal and inferior vena cava " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.3125, "loss": 4.042862892150879 }, { "id": "identity_299999", "image": "A-OKVQA/images/521874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this so small?\nChoices:\n(A) small engineer\n(B) is broken\n(C) for children\n(D) is old" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.5546875, "loss": 4.086417198181152 }, { "id": "identity_323597", "image": "PMC-VQA/images/PMC2820284_fig1_56500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Bilateral chest wall,\n(B) Near the stomach,\n(C) Along the spinal cord,\n(D) Near the adrenal gland" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.515625, "loss": 5.046878814697266 }, { "id": "identity_17947", "image": "Super-CLEVR/images/superCLEVR_new_000419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The big tandem bike has what color?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 22.375, "loss": 4.486772060394287 }, { "id": "identity_276408", "image": "ScienceQA/images/19485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most suitable word to describe the sound of the water in the picture?\nChoices:\n(A) dripping\n(B) snapping\n(C) growling" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.859375, "loss": 4.742184162139893 }, { "id": "identity_277670", "image": "FigureQA/images/95318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.375, "loss": 4.051863193511963 }, { "id": "identity_15201", "image": "A-OKVQA/images/191293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this room located?\nChoices:\n(A) office\n(B) school\n(C) home\n(D) store" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.328125, "loss": 4.320526123046875 }, { "id": "identity_221133", "image": "A-OKVQA/images/84474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seaplanes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.984375, "loss": 4.449791431427002 }, { "id": "identity_38835", "image": "FigureQA/images/17611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 30.9375, "loss": 4.547910213470459 }, { "id": "identity_327522", "image": "PMC-VQA/images/PMC9356014_Fig1_365449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the third row in this show?\nChoices:\n(A) Anterior vascular leakage grading,\n(B) Posterior vascular leakage grading,\n(C) Macular hyperfluorescence grading,\n(D) Leakage domain grading." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.84375, "loss": 4.294554233551025 }, { "id": "identity_313211", "image": "TextVQA/images/4db6d43d361105c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sponsor in the corner on the side?" }, { "from": "gpt", "value": "The answer is sesoma" } ], "clip_score": 17.390625, "loss": 5.766800403594971 }, { "id": "identity_183971", "image": "PMC-VQA/images/PMC7875425_pone.0246752.g003_365450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the background in both images D and H?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 19.03125, "loss": 4.20908784866333 }, { "id": "identity_285408", "image": "IconQA/images/77593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) impossible\n(B) probable\n(C) unlikely\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.015625, "loss": 4.424724578857422 }, { "id": "identity_265728", "image": "DVQA/images/bar_train_00170159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of stacks of bars that have at least one element with a value less than 6?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 20.71875, "loss": 4.106038570404053 }, { "id": "identity_108616", "image": "TextVQA/images/5cf7e1947a240b38.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme of the book's content?" }, { "from": "gpt", "value": "The answer is Escaping from unexpected situations" } ], "clip_score": 21.40625, "loss": 4.867975234985352 }, { "id": "identity_323922", "image": "PMC-VQA/images/PMC3524754_F2_173978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part is shown in the cross-sectional image?\nChoices:\n(A) Forearm \n(B) Thigh \n(C) Head \n(D) Foot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.890625, "loss": 4.0880231857299805 }, { "id": "identity_289744", "image": "A-OKVQA/images/556073.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brand name corresponds to the oral care product displayed here: Glide, Oral-B, Crest, or Colgate?\nChoices:\n(A) glide\n(B) oral-b\n(C) crest\n(D) colgate" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.171875, "loss": 4.441176414489746 }, { "id": "identity_17399", "image": "A-OKVQA/images/79975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the event of the boarder losing their balance what will protect their cranium?\nChoices:\n(A) shirt\n(B) helmet\n(C) knee pads\n(D) wrist guards" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 4.4333906173706055 }, { "id": "identity_305134", "image": "ScienceQA/images/2772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these four items?\nChoices:\n(A) opaque\n(B) hard\n(C) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.78125, "loss": 4.362444877624512 }, { "id": "identity_303876", "image": "AI2D/images/4156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does igneous rock transform into metamorphic rock?\n\nChoices:\n(A) Cooling and crystallizing\n(B) Weathering\n(C) Heating and compacting\n(D) Melting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.28125, "loss": 4.148715019226074 }, { "id": "identity_301808", "image": "AI2D/images/4006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which part is the mantle?\nChoices:\n(A) A\n(B) D\n(C) B\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.6875, "loss": 4.015657901763916 }, { "id": "identity_20422", "image": "A-OKVQA/images/200678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the grill acquiring its heat source?\nChoices:\n(A) gas\n(B) wood chips\n(C) electricity\n(D) charcoal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.9375, "loss": 4.036524295806885 }, { "id": "identity_270564", "image": "TQA/images/abc_question_images/parts_microscope_17177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label that corresponds to the diopter among the given choices: P, W, H, or L?\nChoices:\n(A) P\n(B) W\n(C) H\n(D) L" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.625, "loss": 4.0300188064575195 }, { "id": "identity_270180", "image": "TQA/images/abc_question_images/cycle_water_14974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices represents the process of evaporation from the ocean?\n(A) H\n(B) E\n(C) W\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.9375, "loss": 4.459724426269531 }, { "id": "identity_261556", "image": "TabMWP/images/23875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which month did the charity receive the highest number of donations?\nChoices:\n(A) October\n(B) November\n(C) December\n(D) January" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.9375, "loss": 4.016263008117676 }, { "id": "identity_292909", "image": "PMC-VQA/images/PMC3625575_fig2_198331.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reference of the arrows in the image to?\nChoices:\n(A) epidermis \n(B) dermis \n(C) collagen fiber \n(D) granular layer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.28125, "loss": 5.256330966949463 }, { "id": "identity_282698", "image": "A-OKVQA/images/306972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following individuals is in close proximity to the train? \n(A) cowboy \n(B) police officer \n(C) conductor \n(D) firefighter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.296875, "loss": 4.552494525909424 }, { "id": "identity_199181", "image": "Super-CLEVR/images/superCLEVR_new_000820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels can be seen on the truck?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.75, "loss": 4.163095951080322 }, { "id": "identity_277871", "image": "Super-CLEVR/images/superCLEVR_new_000055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small rubber object in front of the small blue metal object the same color as the matte object behind the large bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.953125, "loss": 5.1956586837768555 }, { "id": "identity_197927", "image": "A-OKVQA/images/244986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the helmet worn by the person on the motorcycle closest to the camera?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.828125, "loss": 4.224358558654785 }, { "id": "identity_302305", "image": "TextVQA/images/7058614605803dc8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What service is provided here?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.4453125, "loss": 4.237792015075684 }, { "id": "identity_38385", "image": "ScienceQA/images/10490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Vermont?\nChoices:\n(A) Montpelier\n(B) Philadelphia\n(C) Burlington\n(D) Trenton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.625, "loss": 4.128687381744385 }, { "id": "identity_286018", "image": "DocVQA/images/sllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title?" }, { "from": "gpt", "value": "The answer is research asst" } ], "clip_score": 17.65625, "loss": 5.236311912536621 }, { "id": "identity_280734", "image": "ScienceQA/images/16710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the common name of this organism?\nChoices:\n(A) great blue heron\n(B) Ardea herodias" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.4375, "loss": 4.031451225280762 }, { "id": "identity_322732", "image": "A-OKVQA/images/174896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the little green bottle in between the two bowls?\nChoices:\n(A) salad dressing\n(B) steak sauce\n(C) bitters\n(D) alcohol" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.21875, "loss": 4.2356719970703125 }, { "id": "identity_310657", "image": "PMC-VQA/images/PMC6970096_FIG2_102919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view shows the effusion more clearly?\nChoices:\n(A) Sagittal view\n(B) Transverse view\n(C) Both views show it equally clearly\n(D) No view shows it clearly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.65625, "loss": 4.260500907897949 }, { "id": "identity_147410", "image": "DocVQA/images/sllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title written next to the name on the document?" }, { "from": "gpt", "value": "The answer is T 117 PB Research Asst" } ], "clip_score": 21.875, "loss": 4.306971073150635 }, { "id": "identity_187566", "image": "A-OKVQA/images/312926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible on the right side of the dock?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.0625, "loss": 4.370021343231201 }, { "id": "identity_37040", "image": "ScienceQA/images/8621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) bumpy\n(B) smooth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.609375, "loss": 4.253377914428711 }, { "id": "identity_334729", "image": "A-OKVQA/images/111673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is this likely to be?\nChoices:\n(A) europe\n(B) asia\n(C) antarctica\n(D) south america" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.875, "loss": 4.303097724914551 }, { "id": "identity_21001", "image": "A-OKVQA/images/522990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person who is in charge of the horse and carriage?\nChoices:\n(A) horses head\n(B) in market\n(C) running races\n(D) bathroom" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.046875, "loss": 4.3756585121154785 }, { "id": "identity_210885", "image": "A-OKVQA/images/463306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light poles are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 12.9296875, "loss": 4.458262920379639 }, { "id": "identity_107689", "image": "TextVQA/images/00bdc07a03221bd8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any branding visible on the containers?" }, { "from": "gpt", "value": "The answer is Yes, Lock&Lock" } ], "clip_score": 21.734375, "loss": 4.27858304977417 }, { "id": "identity_289305", "image": "A-OKVQA/images/10145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following large rectangular-shaped objects is closest to the TV: a cabinet, console, speaker, or stereo?\nChoices:\n(A) cabinet\n(B) console\n(C) speaker\n(D) stereo" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.375, "loss": 4.061854362487793 }, { "id": "identity_275028", "image": "Super-CLEVR/images/superCLEVR_new_000194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a yellow airliner behind the brown metal object located to the right of the matte object, which is to the left of the tiny gray matte car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.59375, "loss": 5.299761772155762 }, { "id": "identity_312407", "image": "TQA/images/abc_question_images/human_system_muscular_16156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the triceps?\nChoices:\n(A) X\n(B) R\n(C) U\n(D) K" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.890625, "loss": 4.885763645172119 }, { "id": "identity_144646", "image": "DocVQA/images/tlph0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the research project leader mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Evans Banbury" } ], "clip_score": 23.5625, "loss": 6.227787494659424 }, { "id": "identity_125542", "image": "VQA2.0/images/117349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine glasses are on the table?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 21.09375, "loss": 4.074770927429199 }, { "id": "identity_313705", "image": "TextVQA/images/3b3bc758010e3ef9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does it say on the hood of the car next to the orange truck with flames?" }, { "from": "gpt", "value": "The answer is stutz" } ], "clip_score": 23.484375, "loss": 4.020446300506592 }, { "id": "identity_207353", "image": "VQA2.0/images/343322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be seen in front of the building?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.734375, "loss": 4.666682243347168 }, { "id": "identity_325604", "image": "TextVQA/images/12533c9193166ae1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this restaurant called?" }, { "from": "gpt", "value": "The answer is imperial palace" } ], "clip_score": 23.53125, "loss": 4.023282051086426 }, { "id": "identity_262401", "image": "TextVQA/images/043a59888f5bcb7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of detergent is available for purchase?" }, { "from": "gpt", "value": "The answer is sun'" } ], "clip_score": 28.546875, "loss": 4.283775329589844 }, { "id": "identity_3295", "image": "FigureQA/images/68992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Bubblegum have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.828125, "loss": 4.351505279541016 }, { "id": "identity_219306", "image": "A-OKVQA/images/319275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.953125, "loss": 5.284719467163086 }, { "id": "identity_16981", "image": "TQA/images/question_images/human_system_digestive_6085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A narrow tube that starts at the stomach and ends at the large intestine\nChoices:\n(A) appendix\n(B) small intestine\n(C) liver\n(D) rectum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.578125, "loss": 4.090502738952637 }, { "id": "identity_296979", "image": "DVQA/images/bar_train_00105203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values less than 9?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 21.234375, "loss": 4.462474822998047 }, { "id": "identity_305946", "image": "TQA/images/abc_question_images/hair_follicles_16989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the bulge?\nChoices:\n(A) K\n(B) N\n(C) R\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.8125, "loss": 4.285671710968018 }, { "id": "identity_334728", "image": "PMC-VQA/images/PMC3509398_F2_169201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which artery is taking off above this?\nChoices:\n(A) Left main coronary artery \n(B) Long LAD \n(C) Proximal RCA \n(D) Distal RCA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.25, "loss": 4.434825420379639 }, { "id": "identity_315501", "image": "VQA-RAD/images/synpic27601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where in relation to the mass is the left kidney located in this?" }, { "from": "gpt", "value": "The answer is posteriorly" } ], "clip_score": 28.03125, "loss": 5.113389492034912 }, { "id": "identity_235097", "image": "VQA2.0/images/367367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event might be taking place in this outdoor location, considering the attire of the people and the presence of a crowd?" }, { "from": "gpt", "value": "The answer is A public parade or festival" } ], "clip_score": 23.53125, "loss": 4.431012153625488 }, { "id": "identity_9584", "image": "A-OKVQA/images/318533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is to the left side?\nChoices:\n(A) traffic light\n(B) apple\n(C) woman\n(D) baby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.421875, "loss": 4.179139137268066 }, { "id": "identity_12367", "image": "A-OKVQA/images/327807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the right selling?\nChoices:\n(A) pizza\n(B) water\n(C) cars\n(D) swords" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.9375, "loss": 4.540131568908691 }, { "id": "identity_211174", "image": "A-OKVQA/images/144562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the fielding team?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.109375, "loss": 5.0534257888793945 }, { "id": "identity_321445", "image": "FigureQA/images/96602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3359375, "loss": 4.619937419891357 }, { "id": "identity_153638", "image": "ScienceQA/images/14251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many holes are there for size adjustment in this belt?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 29.40625, "loss": 4.6136274337768555 }, { "id": "identity_114794", "image": "VizWiz/images/VizWiz_train_00005097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the telephone?" }, { "from": "gpt", "value": "The answer is Telecom Australia" } ], "clip_score": 29.40625, "loss": 4.559710502624512 }, { "id": "identity_306636", "image": "FigureQA/images/6103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.1015625, "loss": 4.024829864501953 }, { "id": "identity_289264", "image": "A-OKVQA/images/430476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is unusual or peculiar about the sidewalk?\nChoices:\n(A) brick\n(B) narrow\n(C) dirt\n(D) steep slope" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.8046875, "loss": 4.2186994552612305 }, { "id": "identity_120491", "image": "TextVQA/images/e0c0747ef4f0bd9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screens are showing a globe?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.5, "loss": 4.497380256652832 }, { "id": "identity_305925", "image": "ScienceQA/images/6299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Concord\n(B) Augusta\n(C) Manchester\n(D) Austin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.484375, "loss": 4.065081596374512 }, { "id": "identity_36957", "image": "IconQA/images/34343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, which color are you less likely to pick?\nChoices:\n(A) black\n(B) purple" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 4.152077674865723 }, { "id": "identity_17091", "image": "AI2D/images/2065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food web shown in the diagram, which organisms are primary consumers?\nChoices:\n(A) wolves and owls\n(B) Deer and rodents\n(C) grass and fungi\n(D) owls and foxes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.609375, "loss": 4.194657325744629 }, { "id": "identity_55354", "image": "FigureQA/images/85239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the highest peak value and what is that value?" }, { "from": "gpt", "value": "The answer is Peru, Approximately 90" } ], "clip_score": 22.140625, "loss": 4.167805194854736 }, { "id": "identity_327315", "image": "FigureQA/images/81678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.484375, "loss": 4.181573867797852 }, { "id": "identity_33261", "image": "ScienceQA/images/19177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the mammal below.\nChoices:\n(A) red kangaroo\n(B) American toad" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3046875, "loss": 4.978260517120361 }, { "id": "identity_17351", "image": "Super-CLEVR/images/superCLEVR_new_001012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a tiny gray metallic thing behind the small bus; is its shape the same as the large green object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.25, "loss": 5.200584411621094 }, { "id": "identity_4962", "image": "A-OKVQA/images/108240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue traffic sign with a stick figure in the center most likely indicate?\nChoices:\n(A) bicycle lane\n(B) school crossing\n(C) pedestrian crossing\n(D) barrier" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.9375, "loss": 4.511234283447266 }, { "id": "identity_251164", "image": "TabMWP/images/35355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the slope of the function represented by the table and verify if the function is linear by checking if the slope is constant between all points" }, { "from": "gpt", "value": "The answer is linear" } ], "clip_score": 25.96875, "loss": 4.879439830780029 }, { "id": "identity_309363", "image": "FigureQA/images/73184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.326891899108887 }, { "id": "identity_272173", "image": "PMC-VQA/images/PMC5975718_F10_320595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining pattern is depicted in A''?\nChoices:\n(A) FITC-Alb only \n(B) Both microglial markers \n(C) Cytoplasmic labels \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.34375, "loss": 4.504368782043457 }, { "id": "identity_176765", "image": "IconQA/images/40637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the objects on the frame?" }, { "from": "gpt", "value": "The answer is Pentagon" } ], "clip_score": 34.0, "loss": 4.0765700340271 }, { "id": "identity_1826", "image": "PMC-VQA/images/PMC5031813_fig1_75534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modalities were used and what was the result?\nChoices:\n(A) MRI and positive result \n(B) CT scan and negative result \n(C) PET scan and negative result \n(D) X-ray and positive result " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 4.086989402770996 }, { "id": "identity_301770", "image": "PMC-VQA/images/PMC3510424_Fig4_169660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the empty spaces indicate?\nChoices:\n(A) Normal structure\n(B) Abnormal structure\n(C) Infection\n(D) Lack of fluid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.328125, "loss": 4.544828414916992 }, { "id": "identity_320364", "image": "PMC-VQA/images/PMC7603721_Fig2_288945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging was done at admission?\nChoices:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.1022210121154785 }, { "id": "identity_63652", "image": "AI2D/images/868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 29.078125, "loss": 4.602218151092529 }, { "id": "identity_201062", "image": "VQA-AS/images/19841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many raccoons are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.140625, "loss": 4.312775611877441 }, { "id": "identity_41744", "image": "DocVQA/images/hnnp0227_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the applicant's last name?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 22.40625, "loss": 4.009276390075684 }, { "id": "identity_297855", "image": "PMC-VQA/images/PMC4100749_F1_306570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lesion is located in the left putamen? \n(A) Hypodense \n(B) Hematoma \n(C) Hyperdense \n(D) Hyperintense" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.203125, "loss": 4.329156398773193 }, { "id": "identity_27959", "image": "DocVQA/images/kkxw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the agency?" }, { "from": "gpt", "value": "The answer is long haymes carr lintas" } ], "clip_score": 21.25, "loss": 5.972033977508545 }, { "id": "identity_202447", "image": "VQA2.0/images/180205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are on the field?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.28125, "loss": 5.058049201965332 }, { "id": "identity_337182", "image": "TQA/images/question_images/human_system_digestive_3742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label that connects the first and third points?\nChoices:\n(A) esophagus\n(B) throat\n(C) kidney\n(D) larynx" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.546875, "loss": 4.402642250061035 }, { "id": "identity_318795", "image": "IconQA/images/55094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle in this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.765625, "loss": 4.012187957763672 }, { "id": "identity_333705", "image": "FigureQA/images/18919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum value for Web Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.953125, "loss": 4.08930778503418 }, { "id": "identity_33959", "image": "ScienceQA/images/2404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) hard\n(B) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.609375, "loss": 4.284992694854736 }, { "id": "identity_312730", "image": "FigureQA/images/67969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Purple the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.046875, "loss": 4.318941593170166 }, { "id": "identity_70989", "image": "ScienceQA/images/8076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Black bear" } ], "clip_score": 35.15625, "loss": 4.14276647567749 }, { "id": "identity_20611", "image": "IconQA/images/14737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) impossible\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.453125, "loss": 4.2735137939453125 }, { "id": "identity_20640", "image": "A-OKVQA/images/62915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue vehicle doing?\nChoices:\n(A) pulling\n(B) overturning\n(C) sinking\n(D) burning" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.484375, "loss": 4.451461315155029 }, { "id": "identity_32594", "image": "VQA2.0/images/126497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 20.8125, "loss": 4.5019850730896 }, { "id": "identity_286475", "image": "IconQA/images/6339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape of this object:\n(A) sphere\n(B) cone\n(C) cylinder\n(D) cube" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.9375, "loss": 4.375833988189697 }, { "id": "identity_300675", "image": "Super-CLEVR/images/superCLEVR_new_000374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same size as this shiny car?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.390625, "loss": 5.176469802856445 }, { "id": "identity_270754", "image": "ScienceQA/images/11894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Dale's experiment to answer?\nChoices:\n(A) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?\n(B) Can pennies hold more drops of pure water or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.625, "loss": 4.28057861328125 }, { "id": "identity_103912", "image": "VQA-RAD/images/synpic28987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign visible in the left upper quadrant of the abdomen?" }, { "from": "gpt", "value": "The answer is Gas bubble sign" } ], "clip_score": 25.390625, "loss": 4.166343688964844 }, { "id": "identity_213157", "image": "A-OKVQA/images/190393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is the chair on?" }, { "from": "gpt", "value": "The answer is Wooden" } ], "clip_score": 21.859375, "loss": 4.101401329040527 }, { "id": "identity_184562", "image": "A-OKVQA/images/319345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are covered with protective covers?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.828125, "loss": 4.523826599121094 }, { "id": "identity_332464", "image": "PMC-VQA/images/PMC5893493_Fig9_299522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the most likely diagnosis? \nChoices:\n(A) Appendicitis \n(B) Acute cholecystitis \n(C) Aortic rupture \n(D) Pancreatitis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.359375, "loss": 4.955224990844727 }, { "id": "identity_291433", "image": "A-OKVQA/images/390515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what is the man to the right of the black vehicle laying?\nChoices:\n(A) box\n(B) sofa\n(C) bench\n(D) case" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0625, "loss": 4.466752529144287 }, { "id": "identity_310044", "image": "FigureQA/images/43383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Khaki the high middle value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.31891393661499 }, { "id": "identity_329325", "image": "A-OKVQA/images/346924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these illuminated white circles mainly used for?\nChoices:\n(A) sports\n(B) showing time\n(C) mounting\n(D) decoration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 4.361014366149902 }, { "id": "identity_317861", "image": "DVQA/images/bar_train_00140852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth bar in each group?" }, { "from": "gpt", "value": "The answer is verse" } ], "clip_score": 12.4921875, "loss": 4.201739311218262 }, { "id": "identity_306927", "image": "A-OKVQA/images/358462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the animal attached to the wagon?\nChoices:\n(A) by accident\n(B) eats grass\n(C) pulls wagon\n(D) stops horse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.875, "loss": 4.239352226257324 }, { "id": "identity_18398", "image": "AI2D/images/4767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the diagram show?\nChoices:\n(A) a leaf\n(B) a cat\n(C) a dog\n(D) a star" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.640625, "loss": 4.234116554260254 }, { "id": "identity_15619", "image": "A-OKVQA/images/490338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the store whose sign has a blue arrow sell?\nChoices:\n(A) dvd\n(B) udon\n(C) sushi\n(D) vhs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.671875, "loss": 4.460874080657959 }, { "id": "identity_129310", "image": "TextVQA/images/3db3be6c3b0a18e8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jersey worn by the player with the number 11?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.5625, "loss": 4.698988437652588 }, { "id": "identity_264974", "image": "AI2D/images/1388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part resembles a tube-like structure?\nChoices:\n(A) E\n(B) C\n(C) F\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.71875, "loss": 4.349193572998047 }, { "id": "identity_321501", "image": "PMC-VQA/images/PMC2844751_F0004_60300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of the tract shown here?\nChoices:\n(A) Anterior to posterior \n(B) Lateral to medial \n(C) Posterior to anterior \n(D) Medial to lateral" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.625, "loss": 4.474654197692871 }, { "id": "identity_275301", "image": "FigureQA/images/47360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.245054244995117 }, { "id": "identity_22642", "image": "ScienceQA/images/4898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Alaska?\nChoices:\n(A) Honolulu\n(B) Juneau\n(C) Santa Fe\n(D) Fairbanks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.40625, "loss": 4.249716758728027 }, { "id": "identity_5595", "image": "TQA/images/question_images/parts_worm_7334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the crop to pharynx?\nChoices:\n(A) aortic arches\n(B) esophagus\n(C) gizzard\n(D) brain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.693728446960449 }, { "id": "identity_33238", "image": "DVQA/images/bar_train_00112475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have accuracy higher than 3 in at least one dataset?" }, { "from": "gpt", "value": "The answer is eight" } ], "clip_score": 32.25, "loss": 4.2096943855285645 }, { "id": "identity_288744", "image": "IconQA/images/12131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a circle using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9765625, "loss": 4.146796703338623 }, { "id": "identity_303638", "image": "PMC-VQA/images/PMC4661982_Fig7_448463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of microscopy was used to capture this image?\nChoices:\n(A) TEM microscopy\n(B) Scanning electron microscopy\n(C) Confocal microscopy\n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.1875, "loss": 4.432064056396484 }, { "id": "identity_328394", "image": "A-OKVQA/images/213117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the logo on the sides of the shoes worn by the person playing baseball?\nChoices:\n(A) black\n(B) green\n(C) red\n(D) white" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.4375, "loss": 4.0225090980529785 }, { "id": "identity_290644", "image": "A-OKVQA/images/395013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which country does this flag belong?\nChoices:\n(A) Russia\n(B) UK\n(C) Nepal\n(D) US" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.96875, "loss": 4.120406150817871 }, { "id": "identity_262015", "image": "AI2D/images/67.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outcome of decomposition?\nChoices:\n(A) diffusion\n(B) fossil fuels\n(C) combustion\n(D) carbon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.9375, "loss": 4.374181270599365 }, { "id": "identity_333999", "image": "Super-CLEVR/images/superCLEVR_new_000795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer rubber articulated buses in front of the large matte vehicle than there are big objects to the left of the metallic bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.703125, "loss": 5.075215816497803 }, { "id": "identity_331857", "image": "TextVQA/images/91e4c8b4e33b7aad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these machines where you put coins in and get different kinds of stickers out?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 24.4375, "loss": 4.646029949188232 }, { "id": "identity_285694", "image": "VQA2.0/images/46085.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the door?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 15.75, "loss": 4.9335479736328125 }, { "id": "identity_292583", "image": "TextVQA/images/53b23a5b81600a08.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which initials are displayed on the red, square sign?" }, { "from": "gpt", "value": "The answer is bvd" } ], "clip_score": 14.15625, "loss": 4.235252857208252 }, { "id": "identity_326822", "image": "PMC-VQA/images/PMC4466355_fig1_396404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan reveal 3 months prior to admission?\nChoices:\n(A) Abnormal growths in the lungs \n(B) Fluid buildup in the lungs \n(C) Air pockets in the lungs \n(D) Normal lung tissue appearance" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.21875, "loss": 4.398255825042725 }, { "id": "identity_103596", "image": "VQA-AS/images/9714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is the man sitting on?" }, { "from": "gpt", "value": "The answer is Coniferous tree log" } ], "clip_score": 17.09375, "loss": 4.014952182769775 }, { "id": "identity_12960", "image": "PMC-VQA/images/PMC2361926_fig1_21606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is showing abnormal uptake in the image? \nChoices:\n(A) Left cervical region and left premolar area. \n(B) Right cervical region and right premolar area. \n(C) Abdomen. \n(D) Brain. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.252253532409668 }, { "id": "identity_137725", "image": "DocVQA/images/kkxw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the program name mentioned in the document?" }, { "from": "gpt", "value": "The answer is Winston Appoasement program" } ], "clip_score": 25.796875, "loss": 4.604002475738525 }, { "id": "identity_289326", "image": "IconQA/images/34166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options correctly states whether there are more bugs than daisies? \n(A) no \n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.03125, "loss": 4.1082353591918945 }, { "id": "identity_282107", "image": "DVQA/images/bar_train_00066571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars with values less than 7?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 21.046875, "loss": 4.2468132972717285 }, { "id": "identity_29012", "image": "A-OKVQA/images/466083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What team is practicing?\nChoices:\n(A) washington\n(B) chicago\n(C) new york\n(D) toronto" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.921875, "loss": 4.0671706199646 }, { "id": "identity_325938", "image": "PMC-VQA/images/PMC4935671_fig0015_48414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who got the image files?\nChoices:\n(A) The imaging lab staff\n(B) The hospital administration\n(C) The patient's family\n(D) Rogue Research Inc." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.03125, "loss": 4.2003254890441895 }, { "id": "identity_314602", "image": "PMC-VQA/images/PMC8606177_FIG2_116677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in Image B?\nChoices:\n(A) Dilated ventricles \n(B) Normal brain parenchyma \n(C) Engorged choroid plexus \n(D) Normal foramina of Luschka" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0625, "loss": 4.410996913909912 }, { "id": "identity_214145", "image": "A-OKVQA/images/373284.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of service is advertised on the sign with the green leaf symbol?" }, { "from": "gpt", "value": "The answer is ATM Exchange" } ], "clip_score": 14.875, "loss": 5.31011962890625 }, { "id": "identity_334591", "image": "A-OKVQA/images/391887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of job is visible?\nChoices:\n(A) garbage man\n(B) rancher\n(C) firefighter\n(D) cowboy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.046875, "loss": 4.521950721740723 }, { "id": "identity_66398", "image": "TQA/images/question_images/tides_140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many high tides are shown on Earth in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 34.28125, "loss": 4.409785747528076 }, { "id": "identity_334039", "image": "AI2D/images/3524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is closest to this membrane?\nChoices:\n(A) Cytoplasm\n(B) nucleolus\n(C) Golgi apparatus\n(D) Mitochondrion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.1875, "loss": 4.978517055511475 }, { "id": "identity_289052", "image": "A-OKVQA/images/37905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abilities does the red object on the ground possess?\nChoices:\n(A) driving around\n(B) flight\n(C) playing music\n(D) catching fish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4609375, "loss": 4.732944965362549 }, { "id": "identity_278503", "image": "PMC-VQA/images/PMC6701203_F3_25752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the stereomicroscope image?\nChoices:\n(A) root apical meristem (ram) \n(B) vascular bundles \n(C) stomata \n(D) shoot apical meristem (sam)" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.1875, "loss": 4.009262561798096 }, { "id": "identity_320487", "image": "PMC-VQA/images/PMC4472211_fig01_397987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary concern depicted in the images?\nChoices:\n(A) Multiple thoracic cysts\n(B) Renal cystic lesions\n(C) Cardiac abnormalities\n(D) Hepatic cysts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.779203414916992 }, { "id": "identity_338326", "image": "A-OKVQA/images/177149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is responsible for the equipment shown here?\nChoices:\n(A) executive branch\n(B) no one\n(C) peace corps\n(D) military" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.5625, "loss": 4.003582000732422 }, { "id": "identity_9303", "image": "PMC-VQA/images/PMC6942098_Fig4_93900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most prominent finding in this image? \nChoices:\n(A) Basilar peribronchial consolidative opacities \n(B) Bibasilar septal thickening \n(C) Nodular peribronchial ground glass opacities \n(D) Patchy peribronchial ground glass opacities " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.171875, "loss": 4.634462833404541 }, { "id": "identity_205068", "image": "TextVQA/images/aff2fca5054718bc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of garment is the person wearing?" }, { "from": "gpt", "value": "The answer is A white lab coat" } ], "clip_score": 25.4375, "loss": 4.329444408416748 }, { "id": "identity_305120", "image": "TQA/images/question_images/blastocyst_9026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What biological function allows cell migration during gastrulation?\nChoices:\n(A) Blastocoel\n(B) Animal pole\n(C) Gray Crescent\n(D) Zygote" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.546875, "loss": 4.147112846374512 }, { "id": "identity_219726", "image": "TextVQA/images/fe531f1bc6187758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many scooters are parked in front of the building with red signage?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.75, "loss": 5.059601783752441 }, { "id": "identity_24613", "image": "ScienceQA/images/9267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) sweet\n(B) salty" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0, "loss": 4.231236457824707 }, { "id": "identity_185969", "image": "TextVQA/images/cf2fc99d2d255cd8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the cityscape displayed on the monitor?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 34.6875, "loss": 4.320650100708008 }, { "id": "identity_192722", "image": "PMC-VQA/images/PMC5065360_pone.0164679.g005_84037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 22.796875, "loss": 4.038942813873291 }, { "id": "identity_316506", "image": "PMC-VQA/images/PMC3407775_F1_147337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is impacted by the soft tissue mass?\nChoices:\n(A) left knee\n(B) right shoulder\n(C) neck\n(D) left elbow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.796875, "loss": 4.186586380004883 }, { "id": "identity_23627", "image": "IconQA/images/56313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 4.167935371398926 }, { "id": "identity_129899", "image": "A-OKVQA/images/576518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field wearing a cap?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.96875, "loss": 4.984297752380371 }, { "id": "identity_281134", "image": "ScienceQA/images/11474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Nevada?\nChoices:\n(A) Helena\n(B) Reno\n(C) Carson City\n(D) Denver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.039573669433594 }, { "id": "identity_225448", "image": "TextVQA/images/46166fa662b13680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two new technologies mentioned on the posters that allow consumers to design and print physical objects with interactive elements?" }, { "from": "gpt", "value": "The answer is Intuitive block-based 3D editor, latest construction kits" } ], "clip_score": 30.234375, "loss": 4.433109283447266 }, { "id": "identity_223270", "image": "Super-CLEVR/images/superCLEVR_new_001781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the yellow bicycle?" }, { "from": "gpt", "value": "The answer is Car" } ], "clip_score": 23.0625, "loss": 4.055544853210449 }, { "id": "identity_204731", "image": "PMC-VQA/images/PMC3432878_fig2_152924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image on the left?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 23.84375, "loss": 4.191242694854736 }, { "id": "identity_337917", "image": "AI2D/images/629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the label C?\nChoices:\n(A) seed\n(B) root\n(C) adult plant\n(D) young plant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.96875, "loss": 4.513805389404297 }, { "id": "identity_272234", "image": "TQA/images/question_images/cycle_rock_6740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the next step in the sequence after transportation?\nChoices:\n(A) solidification\n(B) lithification\n(C) weathering\n(D) deposition" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.921875, "loss": 4.370413780212402 }, { "id": "identity_188149", "image": "PMC-VQA/images/PMC4038083_F1_292690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.5625, "loss": 4.112072944641113 }, { "id": "identity_328821", "image": "FigureQA/images/1792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.625, "loss": 4.0424723625183105 }, { "id": "identity_37269", "image": "PMC-VQA/images/PMC7198717_F1_162893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the two skull lesions located on the CT axial images? \nChoices:\n(A) Temporal lobe \n(B) Pituitary gland \n(C) Frontal lobe \n(D) Occipital lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.828125, "loss": 4.276791572570801 }, { "id": "identity_329209", "image": "TextVQA/images/3742f95f789e4f47.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which team do these people play for?" }, { "from": "gpt", "value": "The answer is michigan " } ], "clip_score": 18.515625, "loss": 5.341893672943115 }, { "id": "identity_319232", "image": "PMC-VQA/images/PMC3550705_Fig1_180401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these red lines indicate? \nChoices:\n(A) stent area \n(B) lumen area \n(C) region of interest \n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.0625, "loss": 4.821661949157715 }, { "id": "identity_211815", "image": "VizWiz/images/VizWiz_train_00016400.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flavor of the drink as indicated on the label?" }, { "from": "gpt", "value": "The answer is Mango Ice" } ], "clip_score": 29.359375, "loss": 4.6550469398498535 }, { "id": "identity_288032", "image": "PMC-VQA/images/PMC7447331_F1_239979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the arrow in each scenario? Is it related to the time of injection, time of assessment, dosage of injection, or pain level?\nChoices:\n(A) Time of injection \n(B) Time of assessment \n(C) Dosage of injection \n(D) Pain level" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6953125, "loss": 4.276978492736816 }, { "id": "identity_325249", "image": "A-OKVQA/images/315314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who will try this cake first?\nChoices:\n(A) groomsman\n(B) bride\n(C) groom\n(D) both" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.25, "loss": 4.051515579223633 }, { "id": "identity_26301", "image": "PMC-VQA/images/PMC6297868_F5_412484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue area on the diagram represent? \nChoices:\n(A) The injection site \n(B) The corpus callosum \n(C) The internal capsule \n(D) The hippocampus " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.84375, "loss": 4.295757293701172 }, { "id": "identity_290238", "image": "ScienceQA/images/4146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the following choices: blue, sour, or stretchy?\nChoices:\n(A) blue\n(B) sour\n(C) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.4921875, "loss": 4.03885555267334 }, { "id": "identity_282393", "image": "A-OKVQA/images/335076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are these people situated? \nChoices:\n(A) shore\n(B) forest\n(C) beach\n(D) field" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.8046875, "loss": 4.052356243133545 }, { "id": "identity_293200", "image": "A-OKVQA/images/432527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is graphite commonly used in the production of?\nChoices:\n(A) ball\n(B) shoe\n(C) net\n(D) racket" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.91015625, "loss": 4.244536399841309 }, { "id": "identity_290451", "image": "VizWiz/images/VizWiz_train_00018087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these windows? If so, how can I open them?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.0625, "loss": 4.167429447174072 }, { "id": "identity_310105", "image": "FigureQA/images/53940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Mint the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.984375, "loss": 4.244683265686035 }, { "id": "identity_19907", "image": "A-OKVQA/images/179814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is next to the oven?\nChoices:\n(A) spices\n(B) refrigerator\n(C) dishwasher\n(D) sink" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.28125, "loss": 4.38836669921875 }, { "id": "identity_279728", "image": "A-OKVQA/images/7685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object is hovering above the rocks? \n(A) bird \n(B) duck \n(C) kite \n(D) newspaper" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.015625, "loss": 4.3127851486206055 }, { "id": "identity_264595", "image": "PMC-VQA/images/PMC8068274_jpm-11-00285-f007_425404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) MRI of the abdomen\n(B) CT scan of the chest\n(C) X-ray of the leg\n(D) PET scan of the brain." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.1875, "loss": 4.406990051269531 }, { "id": "identity_12690", "image": "ScienceQA/images/7599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Maryland?\nChoices:\n(A) Boston\n(B) Baltimore\n(C) Annapolis\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.352827548980713 }, { "id": "identity_31170", "image": "FigureQA/images/32539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chocolate the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.330436706542969 }, { "id": "identity_305020", "image": "DocVQA/images/yqjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For the final 3 years as an agricultural economist, with which administration did he serve?" }, { "from": "gpt", "value": "The answer is War Food Administration" } ], "clip_score": 20.734375, "loss": 4.099235534667969 }, { "id": "identity_323359", "image": "TextVQA/images/8b44bfa1b5717879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the program in the bottom left corner?" }, { "from": "gpt", "value": "The answer is tunenet" } ], "clip_score": 20.96875, "loss": 4.771700859069824 }, { "id": "identity_288303", "image": "PMC-VQA/images/PMC7780869_F2_334052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the ground glass density located in Patient 2's CT scan? \n(A) Upper lobes \n(B) Middle lobes \n(C) Lower lobes \n(D) Entire lung." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.171875, "loss": 4.198456764221191 }, { "id": "identity_35030", "image": "TextVQA/images/02a657b3b00696a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how is the tasmanian devil described as?" }, { "from": "gpt", "value": "The answer is bad company" } ], "clip_score": 11.078125, "loss": 4.741034984588623 }, { "id": "identity_327838", "image": "A-OKVQA/images/44483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do people under these umbrellas do?\nChoices:\n(A) knit\n(B) cheer\n(C) dine\n(D) sleep" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.75, "loss": 4.431727409362793 }, { "id": "identity_268539", "image": "FigureQA/images/79964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral's area under the curve the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.765625, "loss": 4.251523494720459 }, { "id": "identity_298873", "image": "A-OKVQA/images/472079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which character from The Sopranos does the person in the foreground wearing blue resemble the most? Is it:\n(A) Bobby Baccalieri\n(B) Uncle Junior\n(C) Carmela Soprano\n(D) Livia Soprano" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.71875, "loss": 4.185687065124512 }, { "id": "identity_292603", "image": "PMC-VQA/images/PMC8128441_fig4_443760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options represents items that are displayed at the same magnification for direct comparison? \n(A) Normal and mutated viruses \n(B) Pru∆hxg and mutant parasites \n(C) Scale bars and Pru∆hxg \n(D) Mutant parasites and normal cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.796875, "loss": 4.279616355895996 }, { "id": "identity_31900", "image": "A-OKVQA/images/517160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction did the skater just come from?\nChoices:\n(A) train track\n(B) train top\n(C) down\n(D) high up" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.1875, "loss": 4.099503517150879 }, { "id": "identity_71643", "image": "ScienceQA/images/1070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are depicted in the moss life cycle within this diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 35.1875, "loss": 4.210873126983643 }, { "id": "identity_31287", "image": "DVQA/images/bar_train_00171697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom?" }, { "from": "gpt", "value": "The answer is weight" } ], "clip_score": 20.96875, "loss": 4.384123802185059 }, { "id": "identity_327635", "image": "A-OKVQA/images/574940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fabric is suitable for window coverings?\nChoices:\n(A) linen\n(B) silk\n(C) nylon\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.765625, "loss": 4.378051280975342 }, { "id": "identity_279235", "image": "VQA-RAD/images/synpic53287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Was contrast used for this MRI scan?" }, { "from": "gpt", "value": "The answer is with contrast" } ], "clip_score": 31.078125, "loss": 4.3437371253967285 }, { "id": "identity_208618", "image": "A-OKVQA/images/315065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible on the front side of the building?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.984375, "loss": 4.651700019836426 }, { "id": "identity_246422", "image": "Super-CLEVR/images/superCLEVR_new_000170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the blue bicycle and the red metallic thing the same size as the green bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.765625, "loss": 5.141293048858643 }, { "id": "identity_206247", "image": "PMC-VQA/images/PMC8913485_Fig5_219873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale reference in the image?" }, { "from": "gpt", "value": "The answer is A small figure" } ], "clip_score": 17.21875, "loss": 4.68896484375 }, { "id": "identity_201914", "image": "VQA2.0/images/507195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object placed on the cart?" }, { "from": "gpt", "value": "The answer is Water can" } ], "clip_score": 19.09375, "loss": 4.918154239654541 }, { "id": "identity_270202", "image": "IconQA/images/46645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fifth in the sequence after the first picture, which is a star?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.609375, "loss": 4.391697883605957 }, { "id": "identity_139918", "image": "DVQA/images/bar_train_00040083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for steel?" }, { "from": "gpt", "value": "The answer is grass" } ], "clip_score": 23.765625, "loss": 4.066416263580322 }, { "id": "identity_283371", "image": "PMC-VQA/images/PMC4998391_F4_66474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which quadrant of the left breast is the lesion located in the patient?\nChoices:\n(A) Left upper outer quadrant\n(B) Left lower outer quadrant\n(C) Right upper outer quadrant\n(D) Right lower outer quadrant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.515625, "loss": 4.201303005218506 }, { "id": "identity_308364", "image": "DVQA/images/bar_train_00122934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the steelblue color represent?" }, { "from": "gpt", "value": "The answer is dark" } ], "clip_score": 23.09375, "loss": 4.02600622177124 }, { "id": "identity_128627", "image": "VQA-AS/images/16289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are lying on the ground?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.0625, "loss": 4.363534927368164 }, { "id": "identity_314421", "image": "IconQA/images/23189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a plane. What is the ninth image?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.40625, "loss": 4.261492729187012 }, { "id": "identity_306521", "image": "TabMWP/images/13081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.302615165710449 }, { "id": "identity_334719", "image": "AI2D/images/153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do F and G represent?\nChoices:\n(A) Water bulging away from the moon\n(B) The Earth's rotation\n(C) Water bulging towards the moon\n(D) The mean sea level" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.21875, "loss": 4.1123456954956055 }, { "id": "identity_133318", "image": "VQA-AS/images/8200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pet is the yellow animal?" }, { "from": "gpt", "value": "The answer is Cat" } ], "clip_score": 18.015625, "loss": 4.2042341232299805 }, { "id": "identity_299021", "image": "ScienceQA/images/2016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer: from the surroundings to each battery or from each battery to the surroundings?\nChoices:\n(A) the surroundings . . . each battery\n(B) each battery . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.921875, "loss": 4.0583367347717285 }, { "id": "identity_10881", "image": "A-OKVQA/images/428754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person standing on?\nChoices:\n(A) snow\n(B) water\n(C) sticks\n(D) concrete" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.6875, "loss": 4.608051300048828 }, { "id": "identity_115770", "image": "TextVQA/images/a505c1868bd97294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible on the crosswalk in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.703125, "loss": 4.795976638793945 }, { "id": "identity_2138", "image": "ScienceQA/images/7438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term matches the picture?\nChoices:\n(A) phloem\n(B) xylem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.59375, "loss": 4.851258277893066 }, { "id": "identity_338067", "image": "A-OKVQA/images/360751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening at the beach?\nChoices:\n(A) wedding\n(B) clam bake\n(C) bon fire\n(D) blanket bingo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.890625, "loss": 4.262988090515137 }, { "id": "identity_328149", "image": "Super-CLEVR/images/superCLEVR_new_001874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of this car?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 21.34375, "loss": 4.1857991218566895 }, { "id": "identity_4374", "image": "FigureQA/images/52446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.546875, "loss": 4.068404197692871 }, { "id": "identity_220732", "image": "VQA-AS/images/4644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.953125, "loss": 4.048360824584961 }, { "id": "identity_34540", "image": "TextVQA/images/a9f3b1fa9c7caabd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first word on the sign?" }, { "from": "gpt", "value": "The answer is ginos" } ], "clip_score": 15.5078125, "loss": 5.043023586273193 }, { "id": "identity_60481", "image": "DocVQA/images/mjph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the factory mentioned in the report?" }, { "from": "gpt", "value": "The answer is Sterling Factory" } ], "clip_score": 24.109375, "loss": 4.422995567321777 }, { "id": "identity_250952", "image": "A-OKVQA/images/54335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural features can be identified in the city seen through the clock, and how do they indicate the type of city?" }, { "from": "gpt", "value": "The answer is Historical buildings indicate a culturally rich city" } ], "clip_score": 28.6875, "loss": 4.359256744384766 }, { "id": "identity_191656", "image": "A-OKVQA/images/344292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is outside of the red cooking area?" }, { "from": "gpt", "value": "The answer is Grey non-slip texture" } ], "clip_score": 23.40625, "loss": 4.372171878814697 }, { "id": "identity_201433", "image": "PMC-VQA/images/PMC5543595_Fig1_200059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial, patient supine, medical standard orientation" } ], "clip_score": 30.96875, "loss": 4.842052936553955 }, { "id": "identity_22865", "image": "PMC-VQA/images/PMC9318489_FIG2_352586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 2. What is the location of the pulmonary infiltrates? \nChoices:\n(A) Unilateral \n(B) Bilateral \n(C) Peripheral \n(D) Central " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.273448944091797 }, { "id": "identity_306968", "image": "A-OKVQA/images/126606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What group do the individuals with Black Racing jackets belong to?\nChoices:\n(A) school class\n(B) family\n(C) club\n(D) volunteers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.90625, "loss": 4.2075300216674805 }, { "id": "identity_115039", "image": "TextVQA/images/7fe0614df96e1084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of mint packaging are visible?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.671875, "loss": 4.105699062347412 }, { "id": "identity_321436", "image": "PMC-VQA/images/PMC3287112_F1_127456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of medical imaging was used to capture this maturing left precentral hemotoma?\nChoices:\n(A) MRI\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.015625, "loss": 4.076409339904785 }, { "id": "identity_335063", "image": "PMC-VQA/images/PMC5209318_Fig3_109471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the dotted line in this image?\nChoices:\n(A) To demarcate a margin of the trachea.\n(B) To show the location of the esophagus.\n(C) To demarcate a margin of the trachea.\n(D) To show the location of the heart." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.9375, "loss": 4.096129417419434 }, { "id": "identity_338423", "image": "PMC-VQA/images/PMC4422476_Fig1_383839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to visualize the calcifications in the brain?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) MRI\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 4.279637813568115 }, { "id": "identity_190817", "image": "VQA2.0/images/21212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Number of people counted" } ], "clip_score": 18.453125, "loss": 4.338306427001953 }, { "id": "identity_213598", "image": "TextVQA/images/5a59b71a8697b3aa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Dark green" } ], "clip_score": 26.515625, "loss": 4.377256870269775 }, { "id": "identity_335814", "image": "IconQA/images/74168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a fish. Which image is third?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.25, "loss": 4.311266899108887 }, { "id": "identity_7039", "image": "AI2D/images/2010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Algae, Grain and vegetables are\nChoices:\n(A) energy source\n(B) Producers\n(C) primary consumer\n(D) none of above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.121130466461182 }, { "id": "identity_31493", "image": "DVQA/images/bar_train_00093271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.15625, "loss": 4.293148994445801 }, { "id": "identity_285419", "image": "A-OKVQA/images/308900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of setting is this ball park located in? Suburban, desert, urban, or rural?\nChoices:\n(A) suburban\n(B) desert\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 4.61530065536499 }, { "id": "identity_104390", "image": "VQA2.0/images/50735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toothbrushes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.65625, "loss": 4.401358127593994 }, { "id": "identity_34958", "image": "A-OKVQA/images/449103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman in the black boots doing with the can?\nChoices:\n(A) throwing\n(B) buying\n(C) drinking\n(D) recycling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.859375, "loss": 4.291717529296875 }, { "id": "identity_234730", "image": "VQA2.0/images/168356.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of objects can you identify on the desk, and what are they?" }, { "from": "gpt", "value": "The answer is fifteen types of objects" } ], "clip_score": 27.1875, "loss": 4.201926231384277 }, { "id": "identity_30291", "image": "A-OKVQA/images/33718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the man in the air just do?\nChoices:\n(A) bounce\n(B) land\n(C) jump\n(D) fall" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.9453125, "loss": 4.55853796005249 }, { "id": "identity_303621", "image": "IconQA/images/88942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a moon. What is the second image?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.059107303619385 }, { "id": "identity_324814", "image": "Super-CLEVR/images/superCLEVR_new_001358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object to the right of the large matte bicycle shaped like the small blue thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 33.96875, "loss": 5.479847431182861 }, { "id": "identity_25681", "image": "ScienceQA/images/11573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) slippery\n(B) opaque" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.78125, "loss": 4.419583320617676 }, { "id": "identity_282168", "image": "A-OKVQA/images/19349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which of the following items are most of these items likely to be used?\nChoices:\n(A) televisions\n(B) cars\n(C) light fixtures\n(D) air conditioners" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.3125, "loss": 4.242345809936523 }, { "id": "identity_21367", "image": "VizWiz/images/VizWiz_train_00005060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of plant is this?" }, { "from": "gpt", "value": "The answer is dont know" } ], "clip_score": 22.921875, "loss": 4.390143871307373 }, { "id": "identity_103350", "image": "A-OKVQA/images/446481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible traffic lights are showing a red signal?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.421875, "loss": 4.551138401031494 }, { "id": "identity_112736", "image": "TextVQA/images/d6d0a6b7e81a519b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the phone in the center?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.328125, "loss": 4.15062952041626 }, { "id": "identity_38674", "image": "A-OKVQA/images/246001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being displayed on the screen in front of the class?\nChoices:\n(A) movie\n(B) powerpoint presentation\n(C) live tv\n(D) weekly news" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.175423622131348 }, { "id": "identity_27082", "image": "IconQA/images/39731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of circles even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.421875, "loss": 4.227874755859375 }, { "id": "identity_306061", "image": "TextVQA/images/23bfdbe38957354f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this the product?" }, { "from": "gpt", "value": "The answer is ye" } ], "clip_score": 18.078125, "loss": 5.017253875732422 }, { "id": "identity_296585", "image": "PMC-VQA/images/PMC2564839_hrt-94-05-0566-f01_28570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the vessel is the stent positioned?\nOptions:\n(A) In a closed bifurcation artery\n(B) In an open bifurcation artery\n(C) In a straight portion of the vessel\n(D) In a tortuous portion of the vessel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.15625, "loss": 4.324763298034668 }, { "id": "identity_322140", "image": "ScienceQA/images/15473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Richmond\n(B) Charleston\n(C) Frankfort\n(D) Columbia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.71875, "loss": 4.361303806304932 }, { "id": "identity_11508", "image": "IconQA/images/17874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bear. Which picture is ninth?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.234375, "loss": 4.024060249328613 }, { "id": "identity_18806", "image": "ScienceQA/images/17187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) transparent\n(B) bumpy\n(C) smooth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.875, "loss": 4.719873428344727 }, { "id": "identity_298081", "image": "PMC-VQA/images/PMC2375410_fig4_22352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which tissues are visible in the image provided?\nChoices:\n(A) Muscles, \n(B) Lungs and Liver, \n(C) Skin and Bones, \n(D) none of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.3125, "loss": 4.1276021003723145 }, { "id": "identity_284563", "image": "A-OKVQA/images/392991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item is being added to the cake among the following choices: fork, spoon, frosting, or layer?\nChoices:\n(A) fork\n(B) spoon\n(C) frosting\n(D) layer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.28125, "loss": 4.466405868530273 }, { "id": "identity_128479", "image": "VizWiz/images/VizWiz_train_00002661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the ink indicated on the pen?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.65625, "loss": 4.110285758972168 }, { "id": "identity_20118", "image": "TabMWP/images/13798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which event begins at 1.20 P.M.?\nChoices:\n(A) award ceremony\n(B) keynote speech\n(C) band concert\n(D) parade" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.03125, "loss": 4.33450984954834 }, { "id": "identity_4825", "image": "PMC-VQA/images/PMC6922484_F3_87489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of the image?\nChoices:\n(A) Coronal \n(B) Axial \n(C) Sagittal \n(D) Oblique " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 4.877359390258789 }, { "id": "identity_11903", "image": "IconQA/images/29841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is seventh?\nChoices:\n(A) plane\n(B) bear\n(C) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.4184370040893555 }, { "id": "identity_277132", "image": "ScienceQA/images/11484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate? \nOptions:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 4.28125, "loss": 4.229705810546875 }, { "id": "identity_306287", "image": "A-OKVQA/images/375750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this individual playing?\nChoices:\n(A) sand\n(B) park\n(C) court\n(D) playground" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.4375, "loss": 4.435182094573975 }, { "id": "identity_279799", "image": "DVQA/images/bar_train_00174194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the highest total accuracy?" }, { "from": "gpt", "value": "The answer is total" } ], "clip_score": 33.71875, "loss": 4.336517333984375 }, { "id": "identity_136105", "image": "DocVQA/images/xqjf0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of ACTOS as suggested by the tagline?" }, { "from": "gpt", "value": "The answer is To benefit the body's performance" } ], "clip_score": 28.140625, "loss": 4.374185562133789 }, { "id": "identity_250120", "image": "A-OKVQA/images/163091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of aircraft are displayed in the image, and what are their primary uses?" }, { "from": "gpt", "value": "The answer is 2 types, experimental/research and military" } ], "clip_score": 27.0625, "loss": 4.243598937988281 }, { "id": "identity_123677", "image": "PMC-VQA/images/PMC6158203_RSTB20170335F6_372455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might be the clinical significance of the finding indicated by the yellow arrow in image (a)?" }, { "from": "gpt", "value": "The answer is Possible cardiomegaly or cardiac tumor" } ], "clip_score": 30.578125, "loss": 4.253961086273193 }, { "id": "identity_19698", "image": "TQA/images/abc_question_images/skin_cross_section_17586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label shows the Blood Vessels?\nChoices:\n(A) Y\n(B) D\n(C) R\n(D) H" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.6875, "loss": 4.260903358459473 }, { "id": "identity_33124", "image": "PMC-VQA/images/PMC4491230_Fig1_402816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which tract in the brain is predominantly affected?\nChoices:\n(A) Sensory tract \n(B) Pyramidal tract \n(C) Extrapyramidal tract \n(D) Spinocerebellar tract " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.1875, "loss": 4.254197120666504 }, { "id": "identity_201384", "image": "PMC-VQA/images/PMC6639941_Fig3_11403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow arrowheads are pointing to features in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.578125, "loss": 4.238989353179932 }, { "id": "identity_313784", "image": "Super-CLEVR/images/superCLEVR_new_001930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the small red matte thing that is right of the cyan object the same as the tiny brown metallic thing to the right of the bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.359375, "loss": 5.123332977294922 }, { "id": "identity_137671", "image": "DVQA/images/bar_train_00088904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the orange triangle symbol in the legend?" }, { "from": "gpt", "value": "The answer is switch" } ], "clip_score": 18.53125, "loss": 4.143115520477295 }, { "id": "identity_149209", "image": "DocVQA/images/fpyf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many projects were considered for second year funding?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 19.4375, "loss": 4.438655376434326 }, { "id": "identity_292331", "image": "A-OKVQA/images/402833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color shirt is the man in the white hat, who is following the elephants, wearing? Is it green, white, purple, or blue?\nChoices:\n(A) green\n(B) white\n(C) purple\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.015625, "loss": 4.574042320251465 }, { "id": "identity_5397", "image": "DocVQA/images/hrvx0021_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who shall serve as chairman?" }, { "from": "gpt", "value": "The answer is Kenneth J. Lapiejko" } ], "clip_score": 21.5625, "loss": 4.118953704833984 }, { "id": "identity_3954", "image": "PMC-VQA/images/PMC7419468_F1_232724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown? \nChoices:\n(A) T1WI \n(B) T2WI \n(C) CT \n(D) PET " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.132340431213379 }, { "id": "identity_330385", "image": "ScienceQA/images/12879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is shown?\nChoices:\n(A) the Southern Ocean\n(B) the Pacific Ocean\n(C) the Arctic Ocean\n(D) the Atlantic Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.171875, "loss": 4.009246826171875 }, { "id": "identity_318245", "image": "Super-CLEVR/images/superCLEVR_new_001811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything large and metallic on the right side of a small object in front of a brown object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 13.78125, "loss": 5.478537082672119 }, { "id": "identity_36624", "image": "ScienceQA/images/11305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Vermont?\nChoices:\n(A) Montpelier\n(B) Burlington\n(C) Santa Fe\n(D) Jackson" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.796875, "loss": 4.24043083190918 }, { "id": "identity_293561", "image": "A-OKVQA/images/507362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction is considered acceptable to proceed in?\nChoices:\n(A) backwards\n(B) right\n(C) straight\n(D) left" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.4453125, "loss": 4.16284704208374 }, { "id": "identity_294263", "image": "PMC-VQA/images/PMC6293265_ccr31853-fig-0002_411497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area is affected in the CT scan described above? \nOptions:\n(A) Right main bronchus\n(B) Left main lower lobe bronchus\n(C) Right main lower lobe bronchus\n(D) Superior segmental bronchus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.59375, "loss": 4.525961875915527 }, { "id": "identity_104127", "image": "A-OKVQA/images/404655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.53125, "loss": 4.400669097900391 }, { "id": "identity_129907", "image": "VQA2.0/images/415659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather conditions can be inferred from the image?" }, { "from": "gpt", "value": "The answer is Clear weather conditions" } ], "clip_score": 12.515625, "loss": 4.174831390380859 }, { "id": "identity_323766", "image": "Super-CLEVR/images/superCLEVR_new_000061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any brown shiny articulated vehicles visible?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.296875, "loss": 5.228067874908447 }, { "id": "identity_329593", "image": "A-OKVQA/images/400592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is present in this space?\nChoices:\n(A) dog\n(B) basketball hoop\n(C) messy desk\n(D) cat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.640625, "loss": 4.3982415199279785 }, { "id": "identity_298511", "image": "IconQA/images/76427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.154360294342041 }, { "id": "identity_319501", "image": "FigureQA/images/34870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.375, "loss": 4.582860469818115 }, { "id": "identity_158273", "image": "ScienceQA/images/9111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is Missouri River" } ], "clip_score": 27.234375, "loss": 4.1679558753967285 }, { "id": "identity_17023", "image": "Super-CLEVR/images/superCLEVR_new_001036.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more motorbikes that are in front of the cyan rubber cruiser than tiny yellow trucks?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.5625, "loss": 5.048224925994873 }, { "id": "identity_334378", "image": "A-OKVQA/images/9542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter is hidden by the sign after the PHO?\nChoices:\n(A) n\n(B) t\n(C) m\n(D) g" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.4609375, "loss": 4.129725456237793 }, { "id": "identity_304906", "image": "ScienceQA/images/11308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which characteristic did Asterotrygon possess? Choose the characteristic visible on the fossil.\nChoices:\n(A) a wide tail fin\n(B) spots on its skin\n(C) a long, thin tail\n(D) four legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.921875, "loss": 4.301133155822754 }, { "id": "identity_291611", "image": "A-OKVQA/images/407067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the boy on the skateboard raising his hands in the air?\nChoices:\n(A) to balance\n(B) to clap\n(C) to celebrate\n(D) getting help" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.109375, "loss": 4.170785903930664 }, { "id": "identity_35054", "image": "A-OKVQA/images/67131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which United States president was born in this car's state?\nChoices:\n(A) lincoln\n(B) reagan\n(C) obama\n(D) jefferson" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.270486354827881 }, { "id": "identity_261806", "image": "AI2D/images/3797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the symbol \"B\" represent in the following options?\n(A) Roots\n(B) Flower\n(C) Leaves\n(D) Stem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.421875, "loss": 4.528327941894531 }, { "id": "identity_334343", "image": "PMC-VQA/images/PMC8136906_koaa019-F7_446222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of line is mentioned in the caption?\nChoices:\n(A) enhancer-reporter line\n(B) repressor-reporter line\n(C) promoter-reporter line\n(D) transcription factor-reporter line" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.9921875, "loss": 4.125051498413086 }, { "id": "identity_271574", "image": "FigureQA/images/18638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Green or Dark Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.59375, "loss": 4.466997146606445 }, { "id": "identity_200924", "image": "Super-CLEVR/images/superCLEVR_new_001733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the green car?" }, { "from": "gpt", "value": "The answer is A motorcycle" } ], "clip_score": 16.90625, "loss": 4.0544657707214355 }, { "id": "identity_262742", "image": "PMC-VQA/images/PMC7063173_fig1_131008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the anatomical location of the spinal cord herniation?\nChoices:\n(A) C8 nerve root avulsion \n(B) Cervical spine \n(C) Lumbar spine \n(D) Sacrum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.014146327972412 }, { "id": "identity_301994", "image": "PMC-VQA/images/PMC4760377_JCS183152F1_3603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted on the right side in each panel?\nChoices:\n(A) Native fluorescence \n(B) Anti-Myc immunofluorescence \n(C) GFP fusions \n(D) Western Blot" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.265625, "loss": 4.30593204498291 }, { "id": "identity_121527", "image": "A-OKVQA/images/222751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sports activity is depicted in this image?" }, { "from": "gpt", "value": "The answer is Baseball" } ], "clip_score": 27.546875, "loss": 4.013226509094238 }, { "id": "identity_320516", "image": "ScienceQA/images/20692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) scratchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.40625, "loss": 4.284238815307617 }, { "id": "identity_324739", "image": "A-OKVQA/images/449463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the seats on the elephants called?\nChoices:\n(A) stool\n(B) howdah\n(C) spinner\n(D) recliner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.765625, "loss": 4.28571891784668 }, { "id": "identity_264570", "image": "FigureQA/images/93498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Royal Blue or Medium Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.265625, "loss": 4.633072376251221 }, { "id": "identity_314709", "image": "Super-CLEVR/images/superCLEVR_new_000159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the small metallic thing to the right of a bus that is in front of a dirtbike?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 25.578125, "loss": 5.219001293182373 }, { "id": "identity_37581", "image": "PMC-VQA/images/PMC6627429_cancers-11-00847-f004_7769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the paraganglioma shown in the image? \nChoices:\n(A) Behind the descending aorta \n(B) In front of the descending aorta \n(C) Next to the ascending aorta \n(D) Next to the descending aorta " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.21875, "loss": 4.412516117095947 }, { "id": "identity_324820", "image": "PMC-VQA/images/PMC4232727_Fig2_336427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to capture the shown image of grape berry tissue regions?\nChoices:\n(A) X-ray imaging\n(B) CT scanning\n(C) Magnetic resonance imaging\n(D) Ultrasound imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.75, "loss": 4.372566223144531 }, { "id": "identity_33789", "image": "PMC-VQA/images/PMC4514952_Fig2_409253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what part of the brain were the abnormalities observed?\nChoices:\n(A) temporal lobe \n(B) frontal lobe \n(C) parietal lobe \n(D) occipital lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.0625, "loss": 4.583065986633301 }, { "id": "identity_22838", "image": "PMC-VQA/images/PMC7370341_Fig2_216498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the left OA arise from in the given image?\nChoices:\n(A) Superior mesenteric artery \n(B) Splenic artery \n(C) Celiac trunk \n(D) Renal artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.84375, "loss": 4.337693691253662 }, { "id": "identity_9288", "image": "AI2D/images/2650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does letter K represent?\nChoices:\n(A) Tropic of Cancer\n(B) Tropic of Capricorn\n(C) Celestial Equator\n(D) Right Angle to Orbit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.609375, "loss": 4.501546859741211 }, { "id": "identity_209428", "image": "PMC-VQA/images/PMC5727375_F1_252022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 27.015625, "loss": 4.075695514678955 }, { "id": "identity_292569", "image": "VizWiz/images/VizWiz_train_00007063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is can tomatos" } ], "clip_score": 19.578125, "loss": 4.243234157562256 }, { "id": "identity_128332", "image": "TextVQA/images/5abdfa9050b53c7e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the background of the book cover?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.140625, "loss": 4.051986217498779 }, { "id": "identity_23699", "image": "AI2D/images/3211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents Leaf\nChoices:\n(A) I\n(B) K\n(C) J\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.515625, "loss": 4.216113567352295 }, { "id": "identity_318874", "image": "VizWiz/images/VizWiz_train_00018240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe what is shown here?" }, { "from": "gpt", "value": "The answer is ipad mini white around screen" } ], "clip_score": 31.59375, "loss": 4.630110740661621 }, { "id": "identity_284743", "image": "A-OKVQA/images/522461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the individuals sitting in the snow engaged in?\nChoices:\n(A) waiting\n(B) sledding\n(C) gambling\n(D) skiing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.200182914733887 }, { "id": "identity_73474", "image": "ScienceQA/images/14220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are depicted in the food web?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.015625, "loss": 4.421952247619629 }, { "id": "identity_115332", "image": "VQA-AS/images/14148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many doors are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.703125, "loss": 4.2435712814331055 }, { "id": "identity_338109", "image": "VQA2.0/images/178000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What name is on this person's shirt?" }, { "from": "gpt", "value": "The answer is parent" } ], "clip_score": 9.953125, "loss": 5.165310859680176 }, { "id": "identity_17293", "image": "Super-CLEVR/images/superCLEVR_new_001444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any yellow bicycles that are on the left side of the brown car on the left side of the large object that is on the right side of the big utility bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.5, "loss": 5.566761016845703 }, { "id": "identity_28572", "image": "A-OKVQA/images/6725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near the laptop?\nChoices:\n(A) egg\n(B) chair\n(C) apple\n(D) bacon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.452369689941406 }, { "id": "identity_141631", "image": "DocVQA/images/fgfl0225_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the test market provide an opportunity for Camel?" }, { "from": "gpt", "value": "The answer is It allows learning about Camel trademark usage" } ], "clip_score": 21.078125, "loss": 4.319530487060547 }, { "id": "identity_25122", "image": "TextVQA/images/ed7e1688e00a4f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the name of this book?" }, { "from": "gpt", "value": "The answer is fon fon" } ], "clip_score": 19.390625, "loss": 4.228439807891846 }, { "id": "identity_71442", "image": "ScienceQA/images/11499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many planets in the table are primarily composed of rock?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.140625, "loss": 4.670449256896973 }, { "id": "identity_287159", "image": "A-OKVQA/images/59015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the wood planks on the barge being used?\nChoices:\n(A) partying\n(B) decoration\n(C) traction\n(D) building" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.265625, "loss": 4.04657506942749 }, { "id": "identity_35159", "image": "IconQA/images/66285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is sixth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.535496234893799 }, { "id": "identity_295391", "image": "VQA2.0/images/58343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of hardness of the ball?" }, { "from": "gpt", "value": "The answer is very much" } ], "clip_score": 14.03125, "loss": 4.91726541519165 }, { "id": "identity_302047", "image": "FigureQA/images/30607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Light Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.96875, "loss": 4.1211724281311035 }, { "id": "identity_190923", "image": "PMC-VQA/images/PMC5167692_F1_106251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to features in the MRI image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 33.71875, "loss": 4.286820411682129 }, { "id": "identity_321551", "image": "A-OKVQA/images/179153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person in the white jacket wearing a helmet?\nChoices:\n(A) dress code\n(B) for fun\n(C) style\n(D) protection" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.46875, "loss": 4.089733600616455 }, { "id": "identity_216711", "image": "VQA2.0/images/486415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many croissants are there on the plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.609375, "loss": 4.668330192565918 }, { "id": "identity_48843", "image": "DocVQA/images/xrvy0000_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first topic to be discussed in the General Session on the meeting day?" }, { "from": "gpt", "value": "The answer is Test Market Learning" } ], "clip_score": 23.40625, "loss": 4.908114910125732 }, { "id": "identity_104643", "image": "A-OKVQA/images/171479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many athletes are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.984375, "loss": 4.411374092102051 }, { "id": "identity_28953", "image": "A-OKVQA/images/238928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with the whip riding on?\nChoices:\n(A) donkey\n(B) horse\n(C) goat\n(D) camel" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.703125, "loss": 4.151071548461914 }, { "id": "identity_15364", "image": "FigureQA/images/93041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.53125, "loss": 4.609174728393555 }, { "id": "identity_274526", "image": "Super-CLEVR/images/superCLEVR_new_001386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any other items share the same color as the large scooter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.390625, "loss": 4.94017219543457 }, { "id": "identity_13628", "image": "Super-CLEVR/images/superCLEVR_new_001763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The big matte thing that is the same color as the tiny metallic bicycle is what shape?" }, { "from": "gpt", "value": "The answer is road bike" } ], "clip_score": 26.28125, "loss": 4.229907512664795 }, { "id": "identity_306936", "image": "FigureQA/images/84606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rosy Brown's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.421875, "loss": 4.569367408752441 }, { "id": "identity_311305", "image": "PMC-VQA/images/PMC6385472_Fig3_439276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body is shown in the CT scan?\nChoices:\n(A) Brain\n(B) Chest\n(C) Abdomen\n(D) Legs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.124079704284668 }, { "id": "identity_238935", "image": "A-OKVQA/images/183166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding motorcycles in the image, and what unusual item is one of them holding?" }, { "from": "gpt", "value": "The answer is Three people, umbrella" } ], "clip_score": 28.96875, "loss": 4.01212215423584 }, { "id": "identity_197523", "image": "PMC-VQA/images/PMC7916164_materials-14-00828-f006_378225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the strip on the edge of the device?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.65625, "loss": 4.025704383850098 }, { "id": "identity_299895", "image": "A-OKVQA/images/402541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the numbers on the bikes of these bikers?\nChoices:\n(A) insurance\n(B) driver iq\n(C) racing numbers\n(D) vehicle registrations" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.484375, "loss": 4.744487762451172 }, { "id": "identity_319203", "image": "FigureQA/images/61501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Dark Green the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.0, "loss": 4.210726737976074 }, { "id": "identity_327034", "image": "PMC-VQA/images/PMC6462822_f0040_465003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the colors green, blue, and red signify on these images?\nChoices:\n(A) Axial, Sagittal, and Coronal planes\n(B) Anterior-Posterior, Inferior-Superior, and Left-Right\n(C) Anterior-Posterior, Inferior-Superior, and Proximal-Distal\n(D) Anterior-Posterior, Superior-Inferior, and Medial-Lateral." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.114377021789551 }, { "id": "identity_294218", "image": "PMC-VQA/images/PMC3923567_F2_266904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the radiologic findings in the image show?\nChoices:\n(A) Pleural effusion \n(B) Ground-glass opacities \n(C) Nodular opacities \n(D) Cystic lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.59375, "loss": 4.671064853668213 }, { "id": "identity_263163", "image": "ScienceQA/images/19517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of South Carolina?\nChoices:\n(A) Charleston\n(B) Columbia\n(C) Lansing\n(D) Los Angeles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.109375, "loss": 4.198441505432129 }, { "id": "identity_14673", "image": "Super-CLEVR/images/superCLEVR_new_000538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same material as the small truck?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.71875, "loss": 5.019268989562988 }, { "id": "identity_298921", "image": "VizWiz/images/VizWiz_train_00011469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is displayed on the screen?" }, { "from": "gpt", "value": "The answer is nothing to say" } ], "clip_score": 17.265625, "loss": 4.779120922088623 }, { "id": "identity_293046", "image": "DVQA/images/bar_train_00144093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bottom element in each stack of bars?" }, { "from": "gpt", "value": "The answer is tongue" } ], "clip_score": 21.265625, "loss": 4.598971366882324 }, { "id": "identity_1849", "image": "ScienceQA/images/6727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Phoenix\n(B) Tucson\n(C) Honolulu\n(D) Juneau" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.203125, "loss": 4.376049995422363 }, { "id": "identity_30377", "image": "PMC-VQA/images/PMC6826538_genes-10-00762-f002_60464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the insert in the electron micrograph showing? \nChoices:\n(A) Smooth collagen fibres \n(B) Irregular collagen fibres \n(C) No collagen fibres \n(D) Collagen fibres with a consistent diameter " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.734375, "loss": 4.030905723571777 }, { "id": "identity_119332", "image": "A-OKVQA/images/72514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cows are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.15625, "loss": 4.36013126373291 }, { "id": "identity_110351", "image": "A-OKVQA/images/67002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible within the booth area?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.28125, "loss": 4.524340629577637 }, { "id": "identity_321044", "image": "PMC-VQA/images/PMC7594836_F1_285004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass lesion located?\nChoices:\n(A) Thoracic cavity\n(B) Abdomen\n(C) Pelvic cavity\n(D) Head and neck" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.546875, "loss": 4.861663818359375 }, { "id": "identity_268440", "image": "PMC-VQA/images/PMC4583733_Fig2_427137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which phase was the image captured?\nChoices:\n(A) Early Arterial phase\n(B) Hepatic Parenchymal phase\n(C) Portal venous phase\n(D) Late arterial phase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.015625, "loss": 4.290577411651611 }, { "id": "identity_326402", "image": "AI2D/images/3387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is above this?\nChoices:\n(A) hind wing\n(B) fore wing\n(C) antenna\n(D) thorax" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.75, "loss": 4.935916423797607 }, { "id": "identity_319872", "image": "ScienceQA/images/11474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Helena\n(B) Reno\n(C) Carson City\n(D) Denver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.078125, "loss": 4.362974643707275 }, { "id": "identity_20299", "image": "ScienceQA/images/20141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) flexible\n(B) translucent\n(C) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.294079780578613 }, { "id": "identity_28335", "image": "PMC-VQA/images/PMC4779085_Fig5_8741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality seen in the echocardiography? \nChoices:\n(A) right coronary artery aneurysm \n(B) left coronary artery aneurysm \n(C) pulmonary arterial hypertension \n(D) aortic stenosis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.40625, "loss": 4.365675926208496 }, { "id": "identity_220713", "image": "TextVQA/images/eaec8e244807ffbf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing red jerseys in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.265625, "loss": 4.418915748596191 }, { "id": "identity_28149", "image": "A-OKVQA/images/385448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is near the motorcycles?\nChoices:\n(A) zebra\n(B) cow\n(C) tiger\n(D) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.018592357635498 }, { "id": "identity_262396", "image": "PMC-VQA/images/PMC3518965_fig3_171599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type of images was the enhancement observed?\nChoices:\n(A) Coronal.\n(B) Sagittal.\n(C) Axial.\n(D) MIP." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 4.059834003448486 }, { "id": "identity_267700", "image": "VQA-RAD/images/synpic55948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the abnormality localized or spread out?" }, { "from": "gpt", "value": "The answer is Diffuse" } ], "clip_score": 20.59375, "loss": 4.0330810546875 }, { "id": "identity_11857", "image": "ScienceQA/images/5372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Tennessee\n(C) New Hampshire\n(D) Florida" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 4.169809341430664 }, { "id": "identity_29771", "image": "PMC-VQA/images/PMC6971102_F1_103180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary color associated with the white matter skeleton in the image? \nChoices:\n(A) Red \n(B) Green \n(C) Yellow \n(D) Blue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.71875, "loss": 4.340577125549316 }, { "id": "identity_227001", "image": "Super-CLEVR/images/superCLEVR_new_001563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the sizes of the shiny tandem bike in front of the cyan shiny thing, the big metal motorbike, and the small purple plane. Are the tandem bike and the motorbike both larger than the plane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 36.59375, "loss": 4.3825273513793945 }, { "id": "identity_286034", "image": "IconQA/images/3829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of triangles an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.65625, "loss": 4.092873573303223 }, { "id": "identity_333211", "image": "PMC-VQA/images/PMC8864119_F2_200013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the radiological image?\nChoices:\n(A) Abdominal X-ray image\n(B) Chest X-ray image\n(C) Ear X-ray image\n(D) Eye X-ray image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.1875, "loss": 4.2060041427612305 }, { "id": "identity_280683", "image": "A-OKVQA/images/111259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the activity being depicted in the image?\nOptions:\n(A) kayaking\n(B) paddling\n(C) rafting\n(D) canoeing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.046875, "loss": 4.280942916870117 }, { "id": "identity_14123", "image": "FigureQA/images/20282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 4.47805118560791 }, { "id": "identity_266610", "image": "IconQA/images/13714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on: pink or white?\nChoices:\n(A) pink\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.21875, "loss": 4.37952184677124 }, { "id": "identity_276862", "image": "TextVQA/images/72dce20aee0cff02.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kinds of flyers are displayed on the windows?" }, { "from": "gpt", "value": "The answer is food" } ], "clip_score": 25.75, "loss": 4.245554447174072 }, { "id": "identity_303281", "image": "FigureQA/images/27638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.3984375, "loss": 4.020145893096924 }, { "id": "identity_331174", "image": "PMC-VQA/images/PMC4543454_Fig3_415948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are some of the abnormalities seen in this MRI image?\nChoices:\n(A) Enlarged lymph nodes and glands \n(B) Increased vascularity \n(C) Tissue inflammation \n(D) Skin lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.3329877853393555 }, { "id": "identity_18590", "image": "ScienceQA/images/1847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) sticky\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.358267784118652 }, { "id": "identity_332900", "image": "Super-CLEVR/images/superCLEVR_new_000348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the biplane and the red object the same size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 17.71875, "loss": 5.221587181091309 }, { "id": "identity_11271", "image": "A-OKVQA/images/467564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What objects are stored on top of the red vehicle?\nChoices:\n(A) tools\n(B) hoses\n(C) luggage\n(D) tires" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.953125, "loss": 4.1417365074157715 }, { "id": "identity_288114", "image": "A-OKVQA/images/83268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of athlete is being described here? Is it a gymnast, cheerleader, surfer, or snowboarder?\nChoices:\n(A) gymnast\n(B) cheerleader\n(C) surfer\n(D) snowboarder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.265625, "loss": 4.10482931137085 }, { "id": "identity_133280", "image": "VQA2.0/images/310300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vegetables are visible on the cutting board?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.203125, "loss": 4.410166263580322 }, { "id": "identity_19632", "image": "PMC-VQA/images/PMC5409997_f0001_158114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image is shown in the caption? \nChoices:\n(A) X-Ray \n(B) MRI \n(C) CT Scan \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 4.063977241516113 }, { "id": "identity_325765", "image": "A-OKVQA/images/501138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might this arrangement be found?\nChoices:\n(A) office/workplace\n(B) bedroom\n(C) closet\n(D) attic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.671875, "loss": 4.264516353607178 }, { "id": "identity_294948", "image": "A-OKVQA/images/188311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is required to ensure the items in the pink container continue to function properly?\nChoices:\n(A) sharpener\n(B) electricity\n(C) gas\n(D) knife" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.1328125, "loss": 4.246655464172363 }, { "id": "identity_36355", "image": "A-OKVQA/images/68594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the man holding a plastic bag?\nChoices:\n(A) being mischievous\n(B) as punishment\n(C) for fun\n(D) making purchase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.171875, "loss": 4.17305850982666 }, { "id": "identity_299856", "image": "PMC-VQA/images/PMC3513975_fig1_170285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red arrows shown in the images?\nChoices:\n(A) Blood flow \n(B) Normal brain tissue \n(C) Tumors \n(D) Inflammatory lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 4.392589569091797 }, { "id": "identity_280457", "image": "IconQA/images/40350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been taken on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.359375, "loss": 4.399102687835693 }, { "id": "identity_35609", "image": "A-OKVQA/images/231476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this area called?\nChoices:\n(A) mall\n(B) taxi stand\n(C) train depot\n(D) repair shop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0625, "loss": 4.373652458190918 }, { "id": "identity_489", "image": "TextVQA/images/0e5fee6f27d73694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what type of tag is in the title of the comic?" }, { "from": "gpt", "value": "The answer is denkste dir so" } ], "clip_score": 20.421875, "loss": 4.444343566894531 }, { "id": "identity_293955", "image": "PMC-VQA/images/PMC7873983_F1_365076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the digestive system is depicted in the figure provided? \nOptions:\n(A) Small intestine\n(B) Large intestine\n(C) Stomach\n(D) Pancreas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5, "loss": 4.072382926940918 }, { "id": "identity_276450", "image": "VQA-AS/images/10979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the cat?" }, { "from": "gpt", "value": "The answer is carl" } ], "clip_score": 24.328125, "loss": 4.334696292877197 }, { "id": "identity_331286", "image": "ScienceQA/images/2901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which chemical formula corresponds to this molecule?\nChoices:\n(A) NH3\n(B) NH\n(C) H\n(D) H3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.984375, "loss": 4.008852958679199 }, { "id": "identity_293708", "image": "VQA-AS/images/12770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the woman wearing a blue shirt?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.171875, "loss": 4.099281311035156 }, { "id": "identity_319373", "image": "FigureQA/images/38032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.572798252105713 }, { "id": "identity_312751", "image": "A-OKVQA/images/483027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they here?\nChoices:\n(A) to rest\n(B) clean up\n(C) to eat\n(D) selling food" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.09375, "loss": 4.108820915222168 }, { "id": "identity_308186", "image": "ScienceQA/images/15058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could the test show?\nChoices:\n(A) how much the new turbine would weigh\n(B) whether the new turbine could produce 10% more electricity\n(C) if the new turbine could turn easily" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1875, "loss": 4.239691257476807 }, { "id": "identity_300558", "image": "A-OKVQA/images/12666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this vehicle most likely traveling to?\nChoices:\n(A) residential places\n(B) insurance places\n(C) education places\n(D) sightseeing places" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.171875, "loss": 4.057260513305664 }, { "id": "identity_269026", "image": "FigureQA/images/73656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.34375, "loss": 4.017730712890625 }, { "id": "identity_300043", "image": "ScienceQA/images/11794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Cuba\n(B) Jamaica\n(C) Haiti\n(D) Saint Kitts and Nevis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.231863021850586 }, { "id": "identity_30526", "image": "PMC-VQA/images/PMC5854565_Fig1_289552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show? \nChoices:\n(A) A CT scan \n(B) An ultrasound \n(C) An x-ray \n(D) An MRI scan. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.953125, "loss": 4.303811550140381 }, { "id": "identity_143870", "image": "DocVQA/images/phjm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last name of the person giving the critique of analytical methods?" }, { "from": "gpt", "value": "The answer is Schmitz" } ], "clip_score": 17.0, "loss": 4.111156463623047 }, { "id": "identity_197707", "image": "VQA-AS/images/18148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of window is depicted in the image?" }, { "from": "gpt", "value": "The answer is Arched" } ], "clip_score": 24.8125, "loss": 4.073953628540039 }, { "id": "identity_20671", "image": "PMC-VQA/images/PMC7040219_F1_123962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was found in the CT scan? \nChoices:\n(A) Pulmonary embolism \n(B) Pleural effusion \n(C) Pneumothorax \n(D) Lung nodules " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.640625, "loss": 4.101296424865723 }, { "id": "identity_306847", "image": "AI2D/images/3186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central part of this structure?\nChoices:\n(A) plasma membrane\n(B) cytoplasm\n(C) none of the above\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.546875, "loss": 5.212512493133545 }, { "id": "identity_326766", "image": "PMC-VQA/images/PMC7677649_fig0010_301838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition that is suspected based on the finding in this image? \nChoices:\n(A) Artery rupture \n(B) Infection \n(C) Cancer spread \n(D) Nerve damage" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.484375, "loss": 4.778780937194824 }, { "id": "identity_327213", "image": "PMC-VQA/images/PMC9224862_jcm-11-03399-f001_321101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method is shown in this image?\nChoices:\n(A) MRI\n(B) Ultrasound\n(C) CT scan\n(D) X-ray." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.03125, "loss": 5.006392955780029 }, { "id": "identity_262142", "image": "VizWiz/images/VizWiz_train_00000593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the expiration date for this?" }, { "from": "gpt", "value": "The answer is w" } ], "clip_score": 24.453125, "loss": 5.247591972351074 }, { "id": "identity_300473", "image": "A-OKVQA/images/389401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the black object attaching the bike to the pole being used for?\n\nChoices:\n(A) pulley\n(B) wrench\n(C) ramp\n(D) lock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.515625, "loss": 4.434109687805176 }, { "id": "identity_239263", "image": "A-OKVQA/images/567142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the different types of vessels in the image and describe how each one is moved" }, { "from": "gpt", "value": "The answer is Manpower, motor, manpower, motor" } ], "clip_score": 12.3359375, "loss": 4.359947681427002 }, { "id": "identity_286388", "image": "PMC-VQA/images/PMC8024434_Fig6_410859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On T1-weighted imaging, which region of the brain displays a hypointense signal?\nChoices:\n(A) Corpus Callosum\n(B) Medial cerebellum\n(C) Lateral cerebellum\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.734375, "loss": 4.465923309326172 }, { "id": "identity_319968", "image": "DocVQA/images/qhfd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the program director at this radio station?" }, { "from": "gpt", "value": "The answer is James Butler" } ], "clip_score": 21.03125, "loss": 4.070198059082031 }, { "id": "identity_276716", "image": "Super-CLEVR/images/superCLEVR_new_001839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the gray metallic motorbike equivalent to those of the matte aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.84375, "loss": 4.940556526184082 }, { "id": "identity_289328", "image": "A-OKVQA/images/416521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way are the people acquainted with each other?\nChoices:\n(A) rivals\n(B) coworkers\n(C) siblings\n(D) neighbors" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.609375, "loss": 4.645515441894531 }, { "id": "identity_70234", "image": "TQA/images/abc_question_images/human_system_digestive_13708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"T\" indicate in the digestive system diagram?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 33.0, "loss": 4.270445346832275 }, { "id": "identity_179409", "image": "IconQA/images/93618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of paw prints are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 31.015625, "loss": 4.1156840324401855 }, { "id": "identity_306816", "image": "TQA/images/question_images/human_system_digestive_3720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the Mouth to the Stomach?\nChoices:\n(A) Salivary Gland\n(B) Rectum\n(C) Oesophagus\n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.578125, "loss": 4.296694278717041 }, { "id": "identity_329039", "image": "PMC-VQA/images/PMC6945265_f1_95060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain seems to be well separated in this image?\nChoices:\n(A) Thalami\n(B) Mesencephalon\n(C) Septum pellucidum\n(D) Interhemispheric fissure" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 4.555183410644531 }, { "id": "identity_301717", "image": "TextVQA/images/c651ac8bf3bfcc37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this company?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.984375, "loss": 4.290210247039795 }, { "id": "identity_151416", "image": "ScienceQA/images/852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are highlighted in green on the map?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 23.984375, "loss": 5.143332004547119 }, { "id": "identity_111451", "image": "TextVQA/images/05bd3c543fbfe4af.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organizations are listed as part of the event's organization in the image?" }, { "from": "gpt", "value": "The answer is Six organizations" } ], "clip_score": 13.6328125, "loss": 4.423732757568359 }, { "id": "identity_111441", "image": "A-OKVQA/images/229949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the shirt of the person closest to the train door?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.984375, "loss": 4.099880218505859 }, { "id": "identity_273375", "image": "TextVQA/images/0d9a4f68a359ee57.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which name is written on the gold book in the picture on the left?" }, { "from": "gpt", "value": "The answer is lauren connard" } ], "clip_score": 15.7578125, "loss": 4.022393703460693 }, { "id": "identity_157240", "image": "TQA/images/abc_question_images/earth_day_night_10087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label H represent in this image?" }, { "from": "gpt", "value": "The answer is Umbra" } ], "clip_score": 26.703125, "loss": 4.805978775024414 }, { "id": "identity_287739", "image": "PMC-VQA/images/PMC3961443_f1-ol-07-04-1270_275948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the hypodense mass located among the following options: left anterior bladder wall, right anterior bladder wall, posterior bladder wall, or bladder base wall?\nChoices:\n(A) left anterior bladder wall \n(B) right anterior bladder wall \n(C) posterior bladder wall \n(D) bladder base wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.859375, "loss": 4.317850589752197 }, { "id": "identity_28533", "image": "A-OKVQA/images/313712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who uses the circular glass object on the table?\nChoices:\n(A) mothers\n(B) smokers\n(C) vegans\n(D) vegetarians" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.9921875, "loss": 4.595582485198975 }, { "id": "identity_337792", "image": "AI2D/images/4197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the substances out of which a thing is or can be made?\nChoices:\n(A) Cycle\n(B) Material\n(C) Cooling\n(D) Molten" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.828125, "loss": 4.43489933013916 }, { "id": "identity_13638", "image": "DVQA/images/bar_train_00088883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the most number of units summed across all the stores?" }, { "from": "gpt", "value": "The answer is career" } ], "clip_score": 31.015625, "loss": 4.150115966796875 }, { "id": "identity_133760", "image": "TextVQA/images/5a35ae8804f9177a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the vending machine?" }, { "from": "gpt", "value": "The answer is DyDo" } ], "clip_score": 28.96875, "loss": 4.341655731201172 }, { "id": "identity_320908", "image": "PMC-VQA/images/PMC6641677_F1_11576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in this indicate? \nChoices:\n(A) Abnormal structure of some pulmonary arteries. \n(B) Flow of blood in the body. \n(C) Right and Left pulmonary arteries. \n(D) Blood clot in a main pulmonary artery." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.8125, "loss": 4.83535099029541 }, { "id": "identity_117883", "image": "VQA2.0/images/519601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tracks converge at the point where the train is located?" }, { "from": "gpt", "value": "The answer is Two tracks" } ], "clip_score": 18.46875, "loss": 4.3732523918151855 }, { "id": "identity_183987", "image": "A-OKVQA/images/182715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole potatoes are visible on the cutting board?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.0, "loss": 4.47255277633667 }, { "id": "identity_288148", "image": "PMC-VQA/images/PMC5837235_FIG8_283888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique is depicted in the image provided?\nChoices:\n(A) Computed Tomography\n(B) Magnetic Resonance Imaging\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 4.685309410095215 }, { "id": "identity_23573", "image": "IconQA/images/4808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is third?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.234375, "loss": 4.033442497253418 }, { "id": "identity_282505", "image": "TextVQA/images/24fac58b751faa9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which edition does the watch belong to?" }, { "from": "gpt", "value": "The answer is limited" } ], "clip_score": 23.15625, "loss": 5.010748386383057 }, { "id": "identity_63709", "image": "AI2D/images/4023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the Lithosphere in the diagram?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 28.765625, "loss": 4.3183088302612305 }, { "id": "identity_302512", "image": "Super-CLEVR/images/superCLEVR_new_001672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing of the same color as this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 12.90625, "loss": 4.501499176025391 }, { "id": "identity_126818", "image": "VizWiz/images/VizWiz_train_00004572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many caterpillars can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.484375, "loss": 4.69459867477417 }, { "id": "identity_43437", "image": "DocVQA/images/fxwg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the account mentioned in the document?" }, { "from": "gpt", "value": "The answer is Janet's Super Market" } ], "clip_score": 24.046875, "loss": 4.392232418060303 }, { "id": "identity_325846", "image": "DocVQA/images/lghx0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who sent this letter?" }, { "from": "gpt", "value": "The answer is Ray Renteria" } ], "clip_score": 21.890625, "loss": 4.154488563537598 }, { "id": "identity_322499", "image": "PMC-VQA/images/PMC9017362_rbab071-F5_256004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the technique used for imaging shown here?\nChoices:\n(A) X-ray imaging\n(B) CT scanning\n(C) Scanning electron microscopy\n(D) Transmission electron microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.96875, "loss": 4.298119068145752 }, { "id": "identity_12842", "image": "PMC-VQA/images/PMC8303455_cells-10-01578-f002_16104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk (*) indicate in the image? \nChoices:\n(A) The brightness of the fluorescence. \n(B) The separation of somites. \n(C) The position of the midline. \n(D) The number of cells. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.4375, "loss": 5.305540084838867 }, { "id": "identity_310725", "image": "ScienceQA/images/5715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is shown?\nChoices:\n(A) Antigua and Barbuda\n(B) Grenada\n(C) Jamaica\n(D) Haiti" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 4.778968334197998 }, { "id": "identity_22319", "image": "A-OKVQA/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the colorful lights used for?\nChoices:\n(A) parades\n(B) decoration\n(C) controlling traffic\n(D) dancing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.203125, "loss": 4.415152549743652 }, { "id": "identity_336424", "image": "Super-CLEVR/images/superCLEVR_new_000706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the matte thing to the left of the big matte object the same color as the metallic object behind the small metal biplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.125, "loss": 5.1648054122924805 }, { "id": "identity_265356", "image": "A-OKVQA/images/558011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is located at the peak of this corner in the center of the city square?\nChoices:\n(A) church tower\n(B) advertisement\n(C) latitude\n(D) optiplex" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.875, "loss": 4.293047904968262 }, { "id": "identity_1606", "image": "A-OKVQA/images/11968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What impairs sight here?\nChoices:\n(A) blinders\n(B) singing\n(C) eye doctor\n(D) cars" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.6171875, "loss": 4.28773832321167 }, { "id": "identity_10181", "image": "Super-CLEVR/images/superCLEVR_new_001794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large blue road bikes that are behind the tiny cyan metallic aeroplane than large green shiny double buss?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.625, "loss": 4.988122940063477 }, { "id": "identity_308088", "image": "ScienceQA/images/12069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these three items have in common?\nChoices:\n(A) blue\n(B) sticky\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5703125, "loss": 4.129534721374512 }, { "id": "identity_327220", "image": "A-OKVQA/images/528713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is going to happen?\nChoices:\n(A) horse parade\n(B) horse racing\n(C) horse trading\n(D) horse show" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.109375, "loss": 4.236560821533203 }, { "id": "identity_320992", "image": "TQA/images/question_images/tides_2603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What direction is the gravitational pull with respect to this celestial body?\nChoices:\n(A) Away from it.\n(B) Above it.\n(C) Towards it.\n(D) Below it." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 4.136320114135742 }, { "id": "identity_336984", "image": "ScienceQA/images/5240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Carson City\n(B) Honolulu\n(C) Salem\n(D) Hilo" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.75, "loss": 4.544878959655762 }, { "id": "identity_315666", "image": "PMC-VQA/images/PMC7992334_Fig3_399312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in all ultrasonograms?\nChoices:\n(A) Bright pleura \n(B) Dark pleura \n(C) Hypoechogenic line \n(D) No demarcation line." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.984375, "loss": 4.848735332489014 }, { "id": "identity_125689", "image": "TextVQA/images/7ebc2a7ca5753975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.671875, "loss": 4.43624210357666 }, { "id": "identity_300151", "image": "ScienceQA/images/12147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted colony?\nChoices:\n(A) Indiana\n(B) Virginia\n(C) Vermont\n(D) West Virginia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 4.175845146179199 }, { "id": "identity_204314", "image": "Super-CLEVR/images/superCLEVR_new_001699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color among the vehicles in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 22.984375, "loss": 4.5092034339904785 }, { "id": "identity_271445", "image": "ScienceQA/images/20237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Rhode Island\n(B) Ohio\n(C) New Jersey\n(D) New York" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 5.140965461730957 }, { "id": "identity_308030", "image": "TextVQA/images/ff638b94043dd0b3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is oracle" } ], "clip_score": 16.703125, "loss": 5.324385643005371 }, { "id": "identity_300100", "image": "ScienceQA/images/14119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boston\n(B) Hartford\n(C) New Haven\n(D) Bridgeport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.015625, "loss": 4.941156387329102 }, { "id": "identity_207317", "image": "A-OKVQA/images/492411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding the elephant?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.921875, "loss": 4.459371089935303 }, { "id": "identity_332620", "image": "TextVQA/images/ecf5a361d8aa85cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What concept is this large hanging banner stating?" }, { "from": "gpt", "value": "The answer is collaboration " } ], "clip_score": 27.3125, "loss": 4.583837985992432 }, { "id": "identity_287335", "image": "A-OKVQA/images/52638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which individual is in control of the elephant among the following options: the first kid, the fat man, the third kid, or the last woman?\nChoices:\n(A) first kid\n(B) fat man\n(C) third kid\n(D) last woman" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.703125, "loss": 4.013489246368408 }, { "id": "identity_295773", "image": "ChartQA/images/multi_col_60547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between the Urban and Rural regions, which has the smallest difference?" }, { "from": "gpt", "value": "The answer is Limited" } ], "clip_score": 27.28125, "loss": 5.0104079246521 }, { "id": "identity_294721", "image": "A-OKVQA/images/401132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the emotional state of the woman wearing the grey shirt?\nChoices:\n(A) depressed\n(B) hostile\n(C) mad\n(D) excited" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.75, "loss": 4.516223907470703 }, { "id": "identity_272119", "image": "PMC-VQA/images/PMC7767774_os12791-fig-0006_330604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality is utilized to capture the image of the ACL graft?\nChoices:\n(A) X-ray\n(B) MRI\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.21875, "loss": 4.465683460235596 }, { "id": "identity_316784", "image": "FigureQA/images/37016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.046875, "loss": 4.009988307952881 }, { "id": "identity_32462", "image": "A-OKVQA/images/324155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of goods are sold in the store next to the tree?\nChoices:\n(A) baked goods\n(B) coffee\n(C) nutritional supplements\n(D) pizza" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.01953125, "loss": 4.056275367736816 }, { "id": "identity_61369", "image": "MapQA/images/map_8607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the 40.5%-47.2% percentage range?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 27.296875, "loss": 4.477194309234619 }, { "id": "identity_33476", "image": "PMC-VQA/images/PMC5439181_fig2_166937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesion is consistent with the imaging findings mentioned in the caption? \nChoices:\n(A) acute stroke \n(B) malignant tumor \n(C) abscess \n(D) vascular malformation " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.375, "loss": 4.42177152633667 }, { "id": "identity_279619", "image": "FigureQA/images/80177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Khaki or Indigo?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.890625, "loss": 4.025333881378174 }, { "id": "identity_318383", "image": "TQA/images/question_images/muscle_fiber_7085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occurs if this is removed?\nChoices:\n(A) The muscle loses connection to the bone.\n(B) The muscle wraps itself around the bone.\n(C) The muscle loses mass.\n(D) The muscle's strength increases." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.859375, "loss": 4.022089004516602 }, { "id": "identity_267107", "image": "A-OKVQA/images/161719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of fruit juices are available here among grapefruits, grapes, oranges, and lemons?\nChoices:\n(A) grapefruits\n(B) grapes\n(C) orange\n(D) lemons" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.15625, "loss": 4.397408485412598 }, { "id": "identity_129826", "image": "PMC-VQA/images/PMC7214677_F5_166766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to mark the screws' labels?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 19.96875, "loss": 4.441918849945068 }, { "id": "identity_27443", "image": "PMC-VQA/images/PMC8046683_FIG1_418003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of structure is identified in the left kidney? \nChoices:\n(A) solid mass \n(B) complex mass \n(C) cystic formation \n(D) calcification " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.390625, "loss": 4.924869060516357 }, { "id": "identity_148090", "image": "DocVQA/images/sxdg0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title space for at the bottom of the form?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 21.25, "loss": 4.716496467590332 }, { "id": "identity_64501", "image": "ScienceQA/images/8237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many regions are highlighted on the eastern coastline?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.859375, "loss": 4.414464473724365 }, { "id": "identity_303124", "image": "IconQA/images/10347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.65625, "loss": 4.840142250061035 }, { "id": "identity_278404", "image": "AI2D/images/463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of animal is represented by label B in the image? Is it a seahorse, dolphin, shark, or fish?\nChoices:\n(A) Seahorse\n(B) Dolphin\n(C) Shark\n(D) Fish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.337674617767334 }, { "id": "identity_184568", "image": "A-OKVQA/images/248229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the horse second from the left?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.1875, "loss": 4.211308002471924 }, { "id": "identity_286454", "image": "A-OKVQA/images/27041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for her employment at this location?\nChoices:\n(A) left home\n(B) sunny outside\n(C) power source\n(D) is hiding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5234375, "loss": 4.466860771179199 }, { "id": "identity_323876", "image": "TQA/images/question_images/simple_machines_9249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the simple machine shown in this image?\nChoices:\n(A) lever\n(B) wedge\n(C) pulley\n(D) inclined plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.171875, "loss": 4.365933418273926 }, { "id": "identity_264986", "image": "FigureQA/images/94407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.050166130065918 }, { "id": "identity_185903", "image": "TextVQA/images/830baeb11a6b2171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person directly behind the officer?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.359375, "loss": 4.585482597351074 }, { "id": "identity_2551", "image": "ScienceQA/images/6982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Connecticut?\nChoices:\n(A) New Haven\n(B) Hartford\n(C) Omaha\n(D) Fayetteville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.859375, "loss": 4.114956378936768 }, { "id": "identity_28333", "image": "A-OKVQA/images/285131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What boulevard is the Jones Theater on?\nChoices:\n(A) speer\n(B) 12th\n(C) curtis\n(D) spire" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6953125, "loss": 4.393411159515381 }, { "id": "identity_1052", "image": "Super-CLEVR/images/superCLEVR_new_001957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The tandem bike that is to the right of the brown matte mountain bike is what color?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 20.0, "loss": 4.457484245300293 }, { "id": "identity_131864", "image": "A-OKVQA/images/55942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures with thatched roofs can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.125, "loss": 4.597821235656738 }, { "id": "identity_23546", "image": "ScienceQA/images/931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is highlighted?\nChoices:\n(A) the Southern Ocean\n(B) the Pacific Ocean\n(C) the Atlantic Ocean\n(D) the Arctic Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.515625, "loss": 4.026432037353516 }, { "id": "identity_294235", "image": "ScienceQA/images/652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Massachusetts?\nChoices:\n(A) Plymouth\n(B) Montgomery\n(C) Boston\n(D) Hartford" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.0, "loss": 4.035605430603027 }, { "id": "identity_206193", "image": "VQA2.0/images/22447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sticky notes can be seen on the desk?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.921875, "loss": 4.414348602294922 }, { "id": "identity_31772", "image": "IconQA/images/63627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough saddles for every horse?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.228538513183594 }, { "id": "identity_30696", "image": "PMC-VQA/images/PMC4353421_fig1_365270.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the lesion detected on the computed tomography? \nChoices:\n(A) Upper lobe bronchus \n(B) Middle lobe bronchus \n(C) Lower lobe bronchus \n(D) No description provided " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.140625, "loss": 4.128550052642822 }, { "id": "identity_296402", "image": "A-OKVQA/images/522262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item is typically used in conjunction with these vehicles?\nChoices:\n(A) missile launcher\n(B) windshield wipers\n(C) radar\n(D) chain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.7109375, "loss": 4.361316680908203 }, { "id": "identity_314148", "image": "IconQA/images/82754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.90625, "loss": 4.108121395111084 }, { "id": "identity_322642", "image": "A-OKVQA/images/236282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in the lower right doing?\nChoices:\n(A) texting\n(B) petting\n(C) eating\n(D) photographing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.8515625, "loss": 4.23476505279541 }, { "id": "identity_289671", "image": "AI2D/images/2141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What occurs after the egg hatches inside the mother's body, as depicted in the diagram?\nChoices:\n(A) Fertilization of the egg\n(B) Growth of the shark\n(C) Birth of the pup\n(D) Swimming away of the pup" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.125, "loss": 4.0062971115112305 }, { "id": "identity_36286", "image": "VQA2.0/images/147972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what country is this picture taken?" }, { "from": "gpt", "value": "The answer is foreign country" } ], "clip_score": 19.953125, "loss": 4.358980655670166 }, { "id": "identity_275699", "image": "ScienceQA/images/6106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of North Carolina? \n(A) Charlotte \n(B) Hartford \n(C) Raleigh \n(D) Oklahoma City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.375, "loss": 4.078538417816162 }, { "id": "identity_290833", "image": "PMC-VQA/images/PMC7160939_Fig2_152756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of injury is suggested by the image shown?\nOptions:\n(A) Fracture\n(B) Ligament tear\n(C) Muscle strain\n(D) None of the above." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.84375, "loss": 4.432149410247803 }, { "id": "identity_38078", "image": "PMC-VQA/images/PMC8864031_Fig8_199970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing? \nChoices:\n(A) Clusters of amorphous silicon \n(B) Clusters of amorphous diamond \n(C) Clusters of amorphous copper \n(D) Clusters of amorphous iron " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.96875, "loss": 4.330958843231201 }, { "id": "identity_284981", "image": "A-OKVQA/images/98636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what type of surface is the bird currently perched?\nChoices:\n(A) metal\n(B) glass\n(C) brick\n(D) concrete" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5234375, "loss": 4.908816337585449 }, { "id": "identity_108391", "image": "Super-CLEVR/images/superCLEVR_new_001932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located furthest to the right in the image?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 18.40625, "loss": 4.41689920425415 }, { "id": "identity_23620", "image": "PMC-VQA/images/PMC2820284_fig1_56500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the sub pleural thickening located?\nChoices:\n(A) Bilateral chest wall, \n(B) Near the stomach, \n(C) Along the spinal cord, \n(D) Near the adrenal gland" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.8125, "loss": 4.704418659210205 }, { "id": "identity_283439", "image": "A-OKVQA/images/128829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What should be inserted into this object? \nOptions:\n(A) grains\n(B) vegetables\n(C) meat\n(D) money" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.796875, "loss": 4.13273811340332 }, { "id": "identity_1113", "image": "PMC-VQA/images/PMC7847261_DMM046995F3_356423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the outliers plotted as in the boxplots?\nChoices:\n(A) Dotted lines \n(B) Bold lines \n(C) Individual points \n(D) Shaded areas" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.8515625, "loss": 4.0487961769104 }, { "id": "identity_218911", "image": "TextVQA/images/8b44bfa1b5717879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the program at the top right of the screen?" }, { "from": "gpt", "value": "The answer is Newscoaster" } ], "clip_score": 23.484375, "loss": 5.078368186950684 }, { "id": "identity_37123", "image": "PMC-VQA/images/PMC7688103_pone.0241916.g001_304772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the blue lines indicate? \nChoices:\n(A) The exact center of the tumor. \n(B) The location of the tumor boundary. \n(C) A 0.2 mm-wide band inside the boundary of the tumor. \n(D) A 2 mm-wide band inside the boundary of the tumor. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.96875, "loss": 4.008845329284668 }, { "id": "identity_29494", "image": "PMC-VQA/images/PMC6818735_FIG3_57736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the white arrow in the image? \nChoices:\n(A) The full extent of the knife's intracranial route \n(B) A tumor in the brain \n(C) A normal brain tissue \n(D) The right lateral intraventricular hemorrhage " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.28125, "loss": 4.236563682556152 }, { "id": "identity_12200", "image": "FigureQA/images/97957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cornflower have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.515625, "loss": 4.000491142272949 }, { "id": "identity_184193", "image": "VQA2.0/images/461657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the picnic blanket?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.78125, "loss": 4.06764554977417 }, { "id": "identity_293693", "image": "VQA2.0/images/397192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bench's top?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 24.8125, "loss": 4.21074104309082 }, { "id": "identity_284440", "image": "PMC-VQA/images/PMC3978227_F4_279485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in image (F)?\nChoices:\n(A) The magnification of the tarsus of the thoracic leg \n(B) The spiracle of the prothoracic \n(C) The telson \n(D) Abdominal spiracle." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.71875, "loss": 4.4558515548706055 }, { "id": "identity_192373", "image": "Super-CLEVR/images/superCLEVR_new_001722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Blue and yellow" } ], "clip_score": 23.65625, "loss": 4.053679943084717 }, { "id": "identity_29774", "image": "IconQA/images/45556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a circle as a face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.59375, "loss": 4.856360912322998 }, { "id": "identity_263354", "image": "DVQA/images/bar_train_00061219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.515625, "loss": 4.4959187507629395 }, { "id": "identity_30748", "image": "ScienceQA/images/5822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Rhode Island\n(B) Illinois\n(C) Massachusetts\n(D) Ohio" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.484375, "loss": 4.613592624664307 }, { "id": "identity_2933", "image": "A-OKVQA/images/116096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the boards in this picture go while being used?\nChoices:\n(A) kitchen\n(B) ocean\n(C) your car\n(D) air" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5234375, "loss": 4.353026390075684 }, { "id": "identity_283392", "image": "A-OKVQA/images/469165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which shoulder will the batter watch the pitcher? \n(A) his right \n(B) pitchers \n(C) left \n(D) catchers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 4.49808406829834 }, { "id": "identity_300217", "image": "AI2D/images/2671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which month's solstice is indicated by letter K in the diagram?\nChoices:\n(A) December\n(B) March\n(C) June\n(D) September" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.046875, "loss": 4.049540996551514 }, { "id": "identity_217103", "image": "A-OKVQA/images/204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located behind the bus?" }, { "from": "gpt", "value": "The answer is A market or shop" } ], "clip_score": 28.140625, "loss": 4.000084400177002 }, { "id": "identity_192799", "image": "VQA2.0/images/76571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car in motion on the left side of the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.59375, "loss": 4.406243801116943 }, { "id": "identity_116832", "image": "A-OKVQA/images/281586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are wearing sunglasses?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.46875, "loss": 4.553351879119873 }, { "id": "identity_263884", "image": "AI2D/images/280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the brown trout were to die according to the food web, what is the likely outcome for the dragon fly nymphs?\n(A) Their numbers would increase.\n(B) Their numbers would remain the same.\n(C) They would leave the area.\n(D) Their numbers would decrease." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.015625, "loss": 4.442986965179443 }, { "id": "identity_16622", "image": "FigureQA/images/22235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Teal the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.390625, "loss": 4.196080684661865 }, { "id": "identity_129877", "image": "VQA-AS/images/18345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are in the lower left corner of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.609375, "loss": 4.90761661529541 }, { "id": "identity_307079", "image": "DVQA/images/bar_train_00092685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars?" }, { "from": "gpt", "value": "The answer is snow" } ], "clip_score": 26.03125, "loss": 4.001395225524902 }, { "id": "identity_225127", "image": "A-OKVQA/images/571950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing, and what is directly beneath his skateboard?" }, { "from": "gpt", "value": "The answer is Performing a trick, ledge" } ], "clip_score": 31.296875, "loss": 4.441515922546387 }, { "id": "identity_281016", "image": "A-OKVQA/images/452186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the source of the yellow line?\nChoices:\n(A) street lights\n(B) christmas lights\n(C) flood lights\n(D) headlights" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.921875, "loss": 4.574118614196777 }, { "id": "identity_218475", "image": "A-OKVQA/images/507927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bikers are visibly wearing sunglasses?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.4375, "loss": 4.625217914581299 }, { "id": "identity_291710", "image": "DocVQA/images/qkjk0079_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is G T Barody located?" }, { "from": "gpt", "value": "The answer is director promotion" } ], "clip_score": 20.921875, "loss": 4.711245059967041 }, { "id": "identity_277450", "image": "Super-CLEVR/images/superCLEVR_new_001831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the objects behind the rubber jet and the large yellow tandem bike of the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.765625, "loss": 5.43398904800415 }, { "id": "identity_292870", "image": "PMC-VQA/images/PMC7325100_Fig10_201909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the yellow arrows in the diagram?\nChoices:\n(A) Blood Vessels \n(B) Muscle fibers \n(C) Nerves \n(D) Bones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.609375, "loss": 4.814598083496094 }, { "id": "identity_34291", "image": "A-OKVQA/images/367969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the woman likely returning home from?\nChoices:\n(A) mall\n(B) drug store\n(C) work\n(D) restaurant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.078125, "loss": 4.169649124145508 }, { "id": "identity_298058", "image": "ScienceQA/images/4326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) North Carolina\n(B) Maryland\n(C) South Carolina\n(D) Indiana" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.3125, "loss": 4.372092247009277 }, { "id": "identity_308653", "image": "FigureQA/images/73303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.3125, "loss": 4.757951736450195 }, { "id": "identity_258205", "image": "VizWiz/images/VizWiz_train_00000701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the color and position of each item in the image, and identify the color of the item in the middle" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 22.046875, "loss": 5.1296586990356445 }, { "id": "identity_129296", "image": "Super-CLEVR/images/superCLEVR_new_001559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the object that is to the left of the large shiny airplane and behind the red shiny motorbike?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 26.8125, "loss": 4.433350563049316 }, { "id": "identity_237366", "image": "TQA/images/abc_question_images/seasons_11045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which position is the Northern Hemisphere experiencing summer solstice, and what is the position of the Earth in its orbit around the Sun six months later?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 34.90625, "loss": 4.403616428375244 }, { "id": "identity_22651", "image": "ChartQA/images/23940851006052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of bar that represents China?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 12.6015625, "loss": 5.000435829162598 }, { "id": "identity_323556", "image": "TextVQA/images/cd08d21f06b0e8a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of beverage is this?" }, { "from": "gpt", "value": "The answer is mitfuns" } ], "clip_score": 20.3125, "loss": 5.979283809661865 }, { "id": "identity_29206", "image": "IconQA/images/8112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) triangle\n(B) square\n(C) circle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0625, "loss": 4.162391662597656 }, { "id": "identity_211339", "image": "VQA2.0/images/255983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle in the background?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 16.078125, "loss": 4.301733493804932 }, { "id": "identity_274442", "image": "A-OKVQA/images/479595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is this laptop currently placed?\nChoices:\n(A) fire house\n(B) church\n(C) school\n(D) cemetery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.3125, "loss": 4.559096336364746 }, { "id": "identity_510", "image": "PMC-VQA/images/PMC6699134_Fig4_25176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image? \nChoices:\n(A) Cell division \n(B) Transmigration of cancer cells \n(C) Blood circulation \n(D) Brain surgery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.90625, "loss": 4.757197856903076 }, { "id": "identity_290952", "image": "A-OKVQA/images/479948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the net placed in front of the spectators?\nChoices:\n(A) stop ball\n(B) chicken pen\n(C) player captivity\n(D) punishment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.5, "loss": 4.812660217285156 }, { "id": "identity_292822", "image": "PMC-VQA/images/PMC7025078_fig0015_119199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in this image?\nChoices:\n(A) Two posterior bladder wall diverticula\n(B) Two anterior bladder wall diverticula\n(C) Bladder tumors\n(D) Bladder stones" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.828125, "loss": 4.808840274810791 }, { "id": "identity_37193", "image": "ScienceQA/images/9515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Madison\n(B) Saint Paul\n(C) Minneapolis\n(D) Columbus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.335958003997803 }, { "id": "identity_275041", "image": "PMC-VQA/images/PMC7028369_fig2s2_120390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific part of the cells is depicted in the images provided?\nChoices:\n(A) Dendrites \n(B) Axons \n(C) Soma \n(D) Nucleus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.84375, "loss": 4.9542646408081055 }, { "id": "identity_146209", "image": "DocVQA/images/knyf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of discussion for the next meeting?" }, { "from": "gpt", "value": "The answer is Next Meeting Date" } ], "clip_score": 22.078125, "loss": 4.058266639709473 }, { "id": "identity_184557", "image": "VQA2.0/images/24923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building directly behind the locomotive?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.453125, "loss": 4.600041389465332 }, { "id": "identity_190829", "image": "A-OKVQA/images/514293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tarp that is spread on the ground in front of the shop?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 13.8359375, "loss": 4.723592281341553 }, { "id": "identity_285891", "image": "PMC-VQA/images/PMC4403878_pone.0124652.g007_379357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific focus of the asterisk in the given options? \nChoices:\n(A) ectopic induction of puc-lacZ \n(B) wild type expression in the closed arrows \n(C) wild type expression in the open arrows \n(D) wild type expression in the stalk region of the disc" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.65625, "loss": 4.8915228843688965 }, { "id": "identity_292107", "image": "TQA/images/abc_question_images/human_system_nervous_16172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter label corresponds to the cerebellum among the given choices: V, D, X, or W?\nChoices:\n(A) V\n(B) D\n(C) X\n(D) W" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.84375, "loss": 4.045011043548584 }, { "id": "identity_4092", "image": "Super-CLEVR/images/superCLEVR_new_000540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The brown rubber thing that is the same size as the metal bus is what shape?" }, { "from": "gpt", "value": "The answer is biplane" } ], "clip_score": 25.5625, "loss": 4.598561763763428 }, { "id": "identity_23254", "image": "A-OKVQA/images/274793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the animals is abnormal?\nChoices:\n(A) fur skin\n(B) legs\n(C) tail\n(D) mane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.0, "loss": 4.083743095397949 }, { "id": "identity_319957", "image": "A-OKVQA/images/43340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the unusual placement in this space?\nChoices:\n(A) shower\n(B) toilet\n(C) sink\n(D) tub" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.125, "loss": 4.699008941650391 }, { "id": "identity_322338", "image": "A-OKVQA/images/578131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the horse in this image doing?\nChoices:\n(A) steeple chase\n(B) racing\n(C) barrel racing\n(D) roping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.796875, "loss": 4.366156578063965 }, { "id": "identity_289047", "image": "A-OKVQA/images/399540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the school bus currently engaged in?\nChoices:\n(A) going\n(B) stopping\n(C) unloading students\n(D) being parked" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.171875, "loss": 4.690396308898926 }, { "id": "identity_24260", "image": "DVQA/images/bar_train_00159429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dataset does the red color represent?" }, { "from": "gpt", "value": "The answer is plenty" } ], "clip_score": 24.46875, "loss": 5.605832099914551 }, { "id": "identity_311682", "image": "AI2D/images/1708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this illustration show?\nChoices:\n(A) Continental shift\n(B) Planet formation\n(C) Distribution of earthquakes\n(D) Sliding of earth layers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.484375, "loss": 4.388706207275391 }, { "id": "identity_300393", "image": "FigureQA/images/53768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.1953125, "loss": 4.097292900085449 }, { "id": "identity_20744", "image": "PMC-VQA/images/PMC2737537_F2_44773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the area of decreased perfusion located? \nChoices:\n(A) Inferior wall of the left ventricle \n(B) Anterior wall of the right ventricle \n(C) Lateral wall of the left ventricle \n(D) Septal wall of the right ventricle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.0625, "loss": 4.064675331115723 }, { "id": "identity_229772", "image": "ScienceQA/images/4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which properties do these two objects have in common, and which one is unique to each object?" }, { "from": "gpt", "value": "The answer is Soft, worn as clothing; sports activities, casual occasions" } ], "clip_score": 31.96875, "loss": 4.66396951675415 }, { "id": "identity_120020", "image": "A-OKVQA/images/227508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.859375, "loss": 4.413866996765137 }, { "id": "identity_270803", "image": "VQA2.0/images/362309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the destination of the bus?" }, { "from": "gpt", "value": "The answer is down street" } ], "clip_score": 24.65625, "loss": 4.890586853027344 }, { "id": "identity_290663", "image": "A-OKVQA/images/416101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being held by the person?\nOptions:\n(A) tennis balls\n(B) straws\n(C) rocks\n(D) doves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.65625, "loss": 4.431971549987793 }, { "id": "identity_302247", "image": "A-OKVQA/images/345833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in this person's shirt pocket?\nChoices:\n(A) paper\n(B) phone\n(C) pen\n(D) glasses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.015625, "loss": 4.050614356994629 }, { "id": "identity_148566", "image": "DocVQA/images/zhmw0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the publisher who signed the letter?" }, { "from": "gpt", "value": "The answer is Andrew Alan Hyman" } ], "clip_score": 17.359375, "loss": 4.771881103515625 }, { "id": "identity_301343", "image": "IconQA/images/98211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Respond with a time word, not a number. It is (_) past six." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 29.828125, "loss": 4.504111289978027 }, { "id": "identity_317330", "image": "DVQA/images/bar_train_00028518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is park" } ], "clip_score": 19.046875, "loss": 4.425743103027344 }, { "id": "identity_307058", "image": "Super-CLEVR/images/superCLEVR_new_000850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same size as this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 14.3828125, "loss": 4.810588836669922 }, { "id": "identity_302263", "image": "AI2D/images/2615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does letter A indicate?\nChoices:\n(A) Tidal bulge's center of mass\n(B) Moon's gravitational pull\n(C) Earth's rotation\n(D) Earth's gravitational pull towards the moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.03125, "loss": 4.27996301651001 }, { "id": "identity_7899", "image": "Super-CLEVR/images/superCLEVR_new_000733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tiny brown thing made of the same material as the motorbike that is in front of the cyan shiny thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.875, "loss": 5.233719825744629 }, { "id": "identity_332067", "image": "A-OKVQA/images/348826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What language does the name on the side of the largest vehicle in this image come from?\nChoices:\n(A) japanese\n(B) egyptian\n(C) greek\n(D) aramaic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.28125, "loss": 4.349699974060059 }, { "id": "identity_295331", "image": "A-OKVQA/images/440329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the people currently positioned?\nChoices:\n(A) boxing ring\n(B) junkyard\n(C) pool\n(D) parking lot" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.046875, "loss": 4.454330921173096 }, { "id": "identity_32668", "image": "FigureQA/images/3252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum less than Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.484375, "loss": 4.016961097717285 }, { "id": "identity_278037", "image": "PMC-VQA/images/PMC7931080_clinpract-11-00007-f001_383898.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the CT scan? \n(A) Right upper lobe \n(B) Left upper lobe \n(C) Right lower lobe \n(D) Left lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.296875, "loss": 4.592461109161377 }, { "id": "identity_17321", "image": "VQA-AS/images/16427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the plants?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 19.65625, "loss": 4.1794114112854 }, { "id": "identity_189676", "image": "VQA2.0/images/551936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.2578125, "loss": 4.604133605957031 }, { "id": "identity_331388", "image": "TextVQA/images/26cb1858072bdfc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a type of bus?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 25.109375, "loss": 4.7837629318237305 }, { "id": "identity_280103", "image": "FigureQA/images/45404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.4375, "loss": 4.092281341552734 }, { "id": "identity_277422", "image": "Super-CLEVR/images/superCLEVR_new_001038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects of the same size as the green object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.671875, "loss": 5.3002705574035645 }, { "id": "identity_28937", "image": "TQA/images/question_images/parts_microscope_7198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would you look through this microscope?\nChoices:\n(A) Nose piece\n(B) Slide\n(C) Ocular lenses\n(D) Stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 4.232789039611816 }, { "id": "identity_329198", "image": "ScienceQA/images/6760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) Antarctica\n(B) Europe\n(C) Africa\n(D) Australia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.734375, "loss": 4.493036270141602 }, { "id": "identity_296128", "image": "A-OKVQA/images/434354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is commonly found under the tree depicted in the image shown here?\nChoices:\n(A) mice\n(B) elves\n(C) dogs\n(D) gifts" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.859375, "loss": 4.284409999847412 }, { "id": "identity_26347", "image": "A-OKVQA/images/315322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vehicle is most likely to have more than 10 passengers?\nChoices:\n(A) double-decker bus\n(B) blue car\n(C) white truck\n(D) silver truck" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.171875, "loss": 4.07724142074585 }, { "id": "identity_8510", "image": "IconQA/images/20194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.625, "loss": 4.28952169418335 }, { "id": "identity_125472", "image": "TextVQA/images/ff0ef7dc6d86e204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teams' logos are displayed on the board?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 18.625, "loss": 4.501590251922607 }, { "id": "identity_2252", "image": "TQA/images/question_images/optics_refraction_9191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ray is the incoming ray?\nChoices:\n(A) Normal ray\n(B) Reflected ray\n(C) Incident ray\n(D) None" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.1875, "loss": 4.0121169090271 }, { "id": "identity_238860", "image": "VQA2.0/images/581065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event might be taking place in this scene, and what are three visual clues that support your answer?" }, { "from": "gpt", "value": "The answer is A patriotic event; American flag, communal activity, outdoor setup" } ], "clip_score": 29.828125, "loss": 4.131694793701172 }, { "id": "identity_280459", "image": "FigureQA/images/1938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Gold the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.484375, "loss": 4.011563777923584 }, { "id": "identity_307139", "image": "AI2D/images/3346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the upward protruding structures on this?\n\nChoices:\n(A) none of the above\n(B) antenna\n(C) labrum\n(D) thorax" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.765625, "loss": 4.451887607574463 }, { "id": "identity_273137", "image": "A-OKVQA/images/308947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these individuals most likely trying to extinguish?\nChoices:\n(A) flyers\n(B) light\n(C) fire\n(D) dinner" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5, "loss": 4.252800941467285 }, { "id": "identity_240051", "image": "TextVQA/images/6f76993529aa3bfc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the truck company listed and the major sponsor of the event. What is the combined number of letters in both names?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 14.640625, "loss": 4.373437404632568 }, { "id": "identity_133009", "image": "A-OKVQA/images/497227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main subject of the painting?" }, { "from": "gpt", "value": "The answer is A landscape with trees and a figure" } ], "clip_score": 19.28125, "loss": 4.161678314208984 }, { "id": "identity_326594", "image": "PMC-VQA/images/PMC5094036_Fig1_91097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the Chest X-Ray reveal on the day of intubation?\nChoices:\n(A) Normal lung\n(B) Right Lung hyperinflation with cystic images\n(C) Left lung hyperinflation with cystic images\n(D) Both lungs hyperinflation with cystic images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.15625, "loss": 4.0334014892578125 }, { "id": "identity_4687", "image": "IconQA/images/7984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is tenth?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.71875, "loss": 4.275109767913818 }, { "id": "identity_321239", "image": "TabMWP/images/30759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dr. Baxter, a zoo director, researched ticket prices at other zoos around the country. Which charges more for a senior ticket, the Los Angeles Zoo or another zoo?\nChoices:\n(A) Los Angeles\n(B) Another zoo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 38.53125, "loss": 4.5044403076171875 }, { "id": "identity_111796", "image": "A-OKVQA/images/297876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop on the left side of the desk?" }, { "from": "gpt", "value": "The answer is Apple" } ], "clip_score": 23.296875, "loss": 4.040410041809082 }, { "id": "identity_28338", "image": "PMC-VQA/images/PMC7191704_Fig1_161372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to detect intracranial hypotension? \nChoices:\n(A) Computed tomography \n(B) Magnetic Resonance Imaging \n(C) Mammography \n(D) X-ray " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.71875, "loss": 4.019248962402344 }, { "id": "identity_185600", "image": "VQA2.0/images/554636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car behind the red truck?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 21.25, "loss": 4.753700256347656 }, { "id": "identity_30373", "image": "VQA2.0/images/3493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the green plus sign for?" }, { "from": "gpt", "value": "The answer is doctor" } ], "clip_score": 15.234375, "loss": 4.1190900802612305 }, { "id": "identity_292385", "image": "TextVQA/images/1d28c1003ff623ef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of wine is contained in the bottle located on the far right?" }, { "from": "gpt", "value": "The answer is trimbac" } ], "clip_score": 29.46875, "loss": 4.975090026855469 }, { "id": "identity_325394", "image": "A-OKVQA/images/257388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person holding in their right hand?\nChoices:\n(A) gun\n(B) napkin\n(C) wine\n(D) umbrella" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.484375, "loss": 4.258198261260986 }, { "id": "identity_22041", "image": "A-OKVQA/images/487020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The birds seen out of their cage here are sold for what purpose?\nChoices:\n(A) dinner\n(B) hat feathers\n(C) stealing jewels\n(D) pets" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.453125, "loss": 4.07681941986084 }, { "id": "identity_262744", "image": "A-OKVQA/images/320929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which type of plant did the brown blocks originate? \n(A) lilies\n(B) tree\n(C) bamboo\n(D) violets" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.7421875, "loss": 4.116986274719238 }, { "id": "identity_147341", "image": "DocVQA/images/mygl0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the letter contain any specific request for action?" }, { "from": "gpt", "value": "The answer is Yes, to advise at their convenience" } ], "clip_score": 23.203125, "loss": 4.37426233291626 }, { "id": "identity_22918", "image": "PMC-VQA/images/PMC8628426_Fig11_125868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate? \nChoices:\n(A) CCL8 location, \n(B) Healthy tissue, \n(C) Diseased tissue, \n(D) Undetected area. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 5.22617244720459 }, { "id": "identity_76026", "image": "TQA/images/abc_question_images/human_system_eye_16146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled 'W'?" }, { "from": "gpt", "value": "The answer is Protection and form" } ], "clip_score": 17.890625, "loss": 4.855766296386719 }, { "id": "identity_288610", "image": "PMC-VQA/images/PMC3678586_f1-ol-05-05-1625_210296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of the surface of the narrowed area? \nChoices:\n(A) Rough \n(B) Smooth \n(C) Irregular \n(D) Pitted." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.390625, "loss": 4.947581768035889 }, { "id": "identity_13597", "image": "A-OKVQA/images/469728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the bike attached to the rail?\nChoices:\n(A) stay upright\n(B) easily spotted\n(C) prevent theft\n(D) to sell" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.15595817565918 }, { "id": "identity_210822", "image": "TextVQA/images/d7ac482188ca280b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.6171875, "loss": 4.029200553894043 }, { "id": "identity_304497", "image": "AI2D/images/3076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does A represent?\nChoices:\n(A) vacuoles\n(B) ribosomes\n(C) chromatin\n(D) nucleolus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.546875, "loss": 4.500693321228027 }, { "id": "identity_23807", "image": "PMC-VQA/images/PMC6711709_fig5_29123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dashed arrows indicate in the image?\nChoices:\n(A) protein localization \n(B) nucleolar mass \n(C) mitochondria distribution \n(D) lipid droplets localization " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.8835248947143555 }, { "id": "identity_64738", "image": "ScienceQA/images/3049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage of plant growth is depicted in the image?" }, { "from": "gpt", "value": "The answer is Germination" } ], "clip_score": 27.25, "loss": 4.062340259552002 }, { "id": "identity_309472", "image": "A-OKVQA/images/563542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are there black chords around here?\nChoices:\n(A) for power\n(B) for climbing\n(C) to sell\n(D) for decoration" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 6.6640625, "loss": 4.229603290557861 }, { "id": "identity_264327", "image": "PMC-VQA/images/PMC6894034_f2_79200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the patient's body is the aneurysm located?\nChoices:\n(A) In the thoracic aorta\n(B) In the abdominal aorta\n(C) In the femoral artery\n(D) In the carotid artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.203125, "loss": 4.396984100341797 }, { "id": "identity_292708", "image": "ScienceQA/images/11061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of South Dakota?\nChoices:\n(A) Columbus\n(B) Pierre\n(C) Topeka\n(D) Lincoln" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 4.312853813171387 }, { "id": "identity_35547", "image": "A-OKVQA/images/292827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this cat located?\nChoices:\n(A) school\n(B) office\n(C) home\n(D) backyard" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.15625, "loss": 4.104490756988525 }, { "id": "identity_286491", "image": "PMC-VQA/images/PMC7752494_ccr33425-fig-0003_324419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location was the fluid accumulation observed on the scan?\nChoices:\n(A) Cranial side\n(B) Dorsal side\n(C) Caudal side\n(D) Medial side" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.625, "loss": 4.015758514404297 }, { "id": "identity_13687", "image": "VizWiz/images/VizWiz_train_00019177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Please read this label for me. Thank you." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.765625, "loss": 4.8822736740112305 }, { "id": "identity_323239", "image": "PMC-VQA/images/PMC3716634_F3_218838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the orange vertical line in the image indicate? \nChoices:\n(A) The location of a transverse cross section.\n(B) The location of a sagittal cross section.\n(C) The location of a coronal cross section.\n(D) The location of a longitudinal cross section." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.28125, "loss": 4.093327522277832 }, { "id": "identity_112805", "image": "TextVQA/images/128518557d40d33d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible flowers are predominantly purple?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.84375, "loss": 4.086611270904541 }, { "id": "identity_265193", "image": "FigureQA/images/63188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.671875, "loss": 4.11283016204834 }, { "id": "identity_61345", "image": "FigureQA/images/22444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 32.0, "loss": 4.128434181213379 }, { "id": "identity_276835", "image": "TextVQA/images/1ea75b6f56c2a71c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is considered the king of social media?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 20.390625, "loss": 4.165093421936035 }, { "id": "identity_273984", "image": "FigureQA/images/2548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the color with the highest median value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.09375, "loss": 4.574132442474365 }, { "id": "identity_207330", "image": "PMC-VQA/images/PMC9386117_F1_374849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the body is marked in the image?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 24.984375, "loss": 4.19148588180542 }, { "id": "identity_270250", "image": "AI2D/images/373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most appropriate label to describe the mass of igneous rock that intrudes between layers of sedimentary rock, causing uplift?\nChoices:\n(A) Q\n(B) O\n(C) L\n(D) B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.265625, "loss": 4.274499893188477 }, { "id": "identity_251824", "image": "Super-CLEVR/images/superCLEVR_new_001551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image share the same shape as the large blue object, and what are their colors?" }, { "from": "gpt", "value": "The answer is Two objects; colors are blue and gray" } ], "clip_score": 22.46875, "loss": 4.17659330368042 }, { "id": "identity_16538", "image": "A-OKVQA/images/67407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue P sign mean?\nChoices:\n(A) pass\n(B) party\n(C) polo\n(D) park" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.421875, "loss": 4.321171760559082 }, { "id": "identity_329527", "image": "AI2D/images/2199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates redia?\nChoices:\n(A) d\n(B) j\n(C) c\n(D) a" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6640625, "loss": 4.474923133850098 }, { "id": "identity_287498", "image": "A-OKVQA/images/515820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of station is this?\nOptions:\n(A) taxi station\n(B) car park\n(C) train station\n(D) subway station" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.546875, "loss": 4.208162784576416 }, { "id": "identity_336106", "image": "FigureQA/images/19039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Magenta?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.046875, "loss": 4.026274681091309 }, { "id": "identity_144238", "image": "DocVQA/images/jhbv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Kevin C. Brown" } ], "clip_score": 20.796875, "loss": 4.545114517211914 }, { "id": "identity_6764", "image": "FigureQA/images/63021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Burlywood have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.953125, "loss": 4.2935590744018555 }, { "id": "identity_1278", "image": "A-OKVQA/images/11737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does this player stand?\nChoices:\n(A) stands\n(B) dugout\n(C) first base\n(D) pitcher's mound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.328125, "loss": 4.020874500274658 }, { "id": "identity_105155", "image": "PMC-VQA/images/PMC7366570_Fig3_215428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the radiograph shown in the image?" }, { "from": "gpt", "value": "The answer is Lateral left side" } ], "clip_score": 29.9375, "loss": 4.353414535522461 }, { "id": "identity_276334", "image": "AI2D/images/3643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the muscle located on the outside of the heart?\n(A) mitral valve\n(B) left atrium\n(C) cardiac muscle\n(D) vena cava" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.4375, "loss": 4.168320655822754 }, { "id": "identity_289760", "image": "ScienceQA/images/10087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) North Carolina\n(B) South Carolina\n(C) Connecticut\n(D) New Jersey" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.03125, "loss": 4.603407859802246 }, { "id": "identity_334328", "image": "VQA2.0/images/495043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of work are they doing in the photo?" }, { "from": "gpt", "value": "The answer is road" } ], "clip_score": 23.796875, "loss": 5.335246562957764 }, { "id": "identity_301239", "image": "Super-CLEVR/images/superCLEVR_new_000601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else of the same color as this small aeroplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 19.96875, "loss": 4.818336486816406 }, { "id": "identity_261745", "image": "AI2D/images/3942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the central component from the following options:\n(A) nucleus\n(B) starch grain\n(C) flagella\n(D) cell membrane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.40625, "loss": 4.383399486541748 }, { "id": "identity_288364", "image": "A-OKVQA/images/146583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pattern of the stripes on individual zebras like?\nChoices:\n(A) identical\n(B) alike\n(C) unique\n(D) matching" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.484375, "loss": 4.056647777557373 }, { "id": "identity_302111", "image": "PMC-VQA/images/PMC7981177_fig1_396137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What position is depicted in the image?\nChoices:\n(A) Extension position\n(B) Flexion position\n(C) Supine position\n(D) Prone position" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.609375, "loss": 4.937926292419434 }, { "id": "identity_285522", "image": "PMC-VQA/images/PMC7694461_viruses-12-01258-f008_307266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscope was utilized for visualization? \nOptions:\n(A) Transmission electron microscope\n(B) Scanning electron microscope\n(C) Fluorescent microscope\n(D) Confocal microscope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.875, "loss": 4.05203104019165 }, { "id": "identity_32238", "image": "PMC-VQA/images/PMC5845170_Fig2_286151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells are shown in the image? \nChoices:\n(A) Muscle cells \n(B) Heart cells \n(C) Brain cells \n(D) Bone cells " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.78125, "loss": 4.319453239440918 }, { "id": "identity_17996", "image": "Super-CLEVR/images/superCLEVR_new_000376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a shiny motorbike to the right of the gray metallic object; is it the same size as the metallic motorbike left of the large green bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 33.5, "loss": 4.935166358947754 }, { "id": "identity_107883", "image": "VQA2.0/images/92627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible softballs are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.21875, "loss": 4.167869567871094 }, { "id": "identity_128562", "image": "VQA2.0/images/109450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.96875, "loss": 4.434364318847656 }, { "id": "identity_263777", "image": "VQA2.0/images/448609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe the clothing worn by the man?" }, { "from": "gpt", "value": "The answer is clothes" } ], "clip_score": 9.125, "loss": 4.40081787109375 }, { "id": "identity_8110", "image": "ScienceQA/images/12069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) blue\n(B) sticky\n(C) transparent" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.6640625, "loss": 4.354396343231201 }, { "id": "identity_266302", "image": "TQA/images/question_images/parts_worm_7315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to the end of a flatworm?\nChoices:\n(A) male pore\n(B) prostomium\n(C) genital markings\n(D) clitellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 4.821481227874756 }, { "id": "identity_279002", "image": "TQA/images/question_images/nuclear_energy_7100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the condenser in the diagram provided?\nChoices:\n(A) Provide water for the cooling towers\n(B) Cool the steam and turn it back into water\n(C) Transfer excess water from the turbine to the reservoir\n(D) Heat the water to create steam" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 4.364284992218018 }, { "id": "identity_279365", "image": "ScienceQA/images/17894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Rhode Island\n(B) Pennsylvania\n(C) New Hampshire\n(D) Ohio" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.875, "loss": 4.2486090660095215 }, { "id": "identity_234064", "image": "PMC-VQA/images/PMC7921074_F3_380336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the surface texture and elevation characteristics of the mesoventrite where the white arrow points, and explain how these features help in identifying the specific type of elevation" }, { "from": "gpt", "value": "The answer is low longitudinal elevation" } ], "clip_score": 21.4375, "loss": 5.245170593261719 }, { "id": "identity_35061", "image": "A-OKVQA/images/542605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is this?\nChoices:\n(A) circus\n(B) motorcycle rally\n(C) genius convention\n(D) graduation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.505423069000244 }, { "id": "identity_128277", "image": "PMC-VQA/images/PMC4330229_Fig2_358698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is being visualized in this image?" }, { "from": "gpt", "value": "The answer is The neck region" } ], "clip_score": 23.890625, "loss": 4.8822021484375 }, { "id": "identity_338496", "image": "PMC-VQA/images/PMC4430039_F0001_385964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in this MRI image?\nChoices:\n(A) Infection \n(B) Inflammation \n(C) Lesion \n(D) Atrophy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.421875, "loss": 4.1383256912231445 }, { "id": "identity_4508", "image": "ScienceQA/images/18967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the picture. Which word best describes how these candies feel to the touch?\nChoices:\n(A) hard\n(B) fluffy\n(C) greasy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.828125, "loss": 4.195444583892822 }, { "id": "identity_212142", "image": "VQA2.0/images/183913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Fourteen" } ], "clip_score": 11.4765625, "loss": 4.248898029327393 }, { "id": "identity_337061", "image": "A-OKVQA/images/7096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the last number on this? \n\nChoices:\n(A) eight\n(B) zero\n(C) six\n(D) five" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.0390625, "loss": 4.2227277755737305 }, { "id": "identity_11811", "image": "FigureQA/images/51500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.03125, "loss": 4.075778961181641 }, { "id": "identity_271115", "image": "AI2D/images/91.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram provided, what event takes place on September 22, 2008? Is it:\n(A) Winter Solstice\n(B) Autumnal Equinox\n(C) Vernal Equinox\n(D) Summer Solstice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.328125, "loss": 4.049513816833496 }, { "id": "identity_294622", "image": "A-OKVQA/images/484666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which location on this street is a possible place to purchase a used appliance? \n(A) off curb\n(B) pawn shop\n(C) 7 11\n(D) garbage can" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.578125, "loss": 4.098003387451172 }, { "id": "identity_119455", "image": "VQA-AS/images/19002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition is implied by the cloud in the sky?" }, { "from": "gpt", "value": "The answer is The weather condition is fair" } ], "clip_score": 20.28125, "loss": 4.2534050941467285 }, { "id": "identity_316120", "image": "TextVQA/images/ce3afc48c97d7338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book with this character?" }, { "from": "gpt", "value": "The answer is mi libro para pintar" } ], "clip_score": 17.078125, "loss": 5.185975074768066 }, { "id": "identity_57042", "image": "DVQA/images/bar_train_00066119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest overall sum of values?" }, { "from": "gpt", "value": "The answer is doubt" } ], "clip_score": 19.203125, "loss": 4.125181674957275 }, { "id": "identity_294757", "image": "A-OKVQA/images/349193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of net is depicted in the image? \nOptions:\n(A) tennis\n(B) beach volleyball\n(C) fishing\n(D) butterfly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.977574348449707 }, { "id": "identity_21609", "image": "A-OKVQA/images/509811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns those laptops?\nChoices:\n(A) one individual\n(B) library\n(C) multiple individuals\n(D) non profit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.454164981842041 }, { "id": "identity_299144", "image": "A-OKVQA/images/315195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is taking place in the scene depicted?\nOptions:\n(A) fire\n(B) riot\n(C) running race\n(D) water leakage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.78125, "loss": 4.4545578956604 }, { "id": "identity_75829", "image": "TQA/images/question_images/optics_ray_diagrams_9165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the gray arrows in the image?" }, { "from": "gpt", "value": "The answer is Image" } ], "clip_score": 28.21875, "loss": 4.138138771057129 }, { "id": "identity_279709", "image": "A-OKVQA/images/425342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the activity being performed by the person upside down with the frisbee?\nChoices:\n(A) consuming it\n(B) catching\n(C) throwing\n(D) idle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.796875, "loss": 4.576781749725342 }, { "id": "identity_122389", "image": "VQA-AS/images/3524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pet does the person have?" }, { "from": "gpt", "value": "The answer is Dog" } ], "clip_score": 18.296875, "loss": 4.1008806228637695 }, { "id": "identity_37303", "image": "TextVQA/images/00fbe21f9e1b79de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: where is the bus going?" }, { "from": "gpt", "value": "The answer is rodeo" } ], "clip_score": 29.78125, "loss": 4.029890537261963 }, { "id": "identity_302678", "image": "AI2D/images/2987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located below this?\nChoices:\n(A) Hard body\n(B) The worm\n(C) Bark\n(D) Arch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 4.135766506195068 }, { "id": "identity_24794", "image": "A-OKVQA/images/247071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cow doing?\nChoices:\n(A) drinking water\n(B) finding friends\n(C) resting\n(D) finding food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.984375, "loss": 4.634396553039551 }, { "id": "identity_103843", "image": "VQA2.0/images/69498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing sunglasses?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.765625, "loss": 5.060449123382568 }, { "id": "identity_99", "image": "FigureQA/images/37200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.021214008331299 }, { "id": "identity_292711", "image": "ScienceQA/images/15715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic does this object possess?\nChoices:\n(A) breakable\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.4453125, "loss": 4.220706939697266 }, { "id": "identity_207500", "image": "A-OKVQA/images/574140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tie the person is holding?" }, { "from": "gpt", "value": "The answer is Red with patterns" } ], "clip_score": 19.046875, "loss": 4.392887592315674 }, { "id": "identity_232008", "image": "IconQA/images/50022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 32.59375, "loss": 4.457707405090332 }, { "id": "identity_54598", "image": "DocVQA/images/jkmm0020_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of participation?" }, { "from": "gpt", "value": "The answer is Full Line Distribution" } ], "clip_score": 18.5625, "loss": 4.132015705108643 }, { "id": "identity_230678", "image": "VQA2.0/images/265967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of motorcycles can you identify in this bike show, and what are their distinguishing features?" }, { "from": "gpt", "value": "The answer is three motorcycles" } ], "clip_score": 28.609375, "loss": 4.021981239318848 }, { "id": "identity_206753", "image": "A-OKVQA/images/136185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the books' spines on the second shelf from the top?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.8125, "loss": 4.011883735656738 }, { "id": "identity_338311", "image": "IconQA/images/72180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a train. Which image is eighth?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.5625, "loss": 4.428879261016846 }, { "id": "identity_323347", "image": "FigureQA/images/1080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Cyan?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2265625, "loss": 4.214787483215332 }, { "id": "identity_120393", "image": "VQA2.0/images/24551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing baseball gloves in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.875, "loss": 5.035131931304932 }, { "id": "identity_281610", "image": "ScienceQA/images/3529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material was used to make this canoe?\nChoices:\n(A) clay\n(B) wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 4.135689735412598 }, { "id": "identity_319271", "image": "A-OKVQA/images/573132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this brown object on the table used for?\nChoices:\n(A) grind flour\n(B) grind spices\n(C) grind coffee\n(D) grind meat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.1875, "loss": 4.239344120025635 }, { "id": "identity_112689", "image": "Super-CLEVR/images/superCLEVR_new_001307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is located to the right of the bicycle?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 24.6875, "loss": 4.297990322113037 }, { "id": "identity_222266", "image": "VQA2.0/images/480322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary activity taking place in the image?" }, { "from": "gpt", "value": "The answer is A market or fruit sale" } ], "clip_score": 27.921875, "loss": 4.667572498321533 }, { "id": "identity_291679", "image": "PMC-VQA/images/PMC6292007_Fig4_411132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the image: MRI, CT scan, CF image, or X-ray?\nChoices:\n(A) MRI \n(B) CT scan \n(C) CF image \n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.71875, "loss": 4.372628688812256 }, { "id": "identity_319744", "image": "FigureQA/images/77216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick less than another color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.171875, "loss": 4.1156206130981445 }, { "id": "identity_263573", "image": "PMC-VQA/images/PMC5641062_F4_226991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which marker is utilized to label cells in the image among the following options: (A) Green asterisks, (B) Red asterisks, (C) Blue asterisks, or (D) Yellow asterisks?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.78125, "loss": 4.220925331115723 }, { "id": "identity_43981", "image": "DVQA/images/bar_train_00112747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the least total units sold across all stores?" }, { "from": "gpt", "value": "The answer is step" } ], "clip_score": 27.96875, "loss": 4.215575218200684 }, { "id": "identity_255059", "image": "PMC-VQA/images/PMC4150633_f0005_316993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential symptoms a patient might experience due to a cyst located in the left parietotemporal region, and how might these symptoms differ from a cyst located in the right parietotemporal region?" }, { "from": "gpt", "value": "The answer is Language difficulties, memory issues for left; spatial disorientation for right" } ], "clip_score": 26.828125, "loss": 4.101378917694092 }, { "id": "identity_333938", "image": "A-OKVQA/images/468956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What skill is being practiced here?\nChoices:\n(A) tooth brushing\n(B) singing\n(C) reading\n(D) sleeping" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.90625, "loss": 4.314338684082031 }, { "id": "identity_22080", "image": "VQA2.0/images/83858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the person's shirt?" }, { "from": "gpt", "value": "The answer is white with red" } ], "clip_score": 17.140625, "loss": 4.043017387390137 }, { "id": "identity_284059", "image": "A-OKVQA/images/354167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaves imprints in the snow with every step they take? \n(A) nothing\n(B) their shadow\n(C) their shoes\n(D) their hands" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.421875, "loss": 4.640420436859131 }, { "id": "identity_336173", "image": "PMC-VQA/images/PMC4172974_Fig7_322229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the condition observed in the animal?\nChoices:\n(A) Osteomyelitis of a bone\n(B) Osteomyelitis of a jawbone\n(C) Periapical infection of a tooth\n(D) Periapical infection of another tooth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.512570858001709 }, { "id": "identity_291144", "image": "DocVQA/images/jtdg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the outcome?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 21.59375, "loss": 4.3596367835998535 }, { "id": "identity_215033", "image": "VQA-AS/images/18770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.859375, "loss": 4.358022212982178 }, { "id": "identity_273802", "image": "ScienceQA/images/15793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the mammal from the options below:\n(A) human\n(B) pelican" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.828125, "loss": 4.190790176391602 }, { "id": "identity_210633", "image": "VQA-AS/images/16109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are visible on the tree?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.8125, "loss": 4.089787483215332 }, { "id": "identity_334305", "image": "AI2D/images/2547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this circle represent?\nChoices:\n(A) moon's orbit\n(B) earth's orbit\n(C) moon's rotation\n(D) sun's orbit" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.1875, "loss": 4.2294464111328125 }, { "id": "identity_337253", "image": "PMC-VQA/images/PMC6126002_Fig3_364099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture this image?\nChoices:\n(A) Magnetic resonance imaging\n(B) Computed tomography scan\n(C) Positron emission tomography\n(D) X-ray" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.46875, "loss": 4.498537540435791 }, { "id": "identity_333232", "image": "A-OKVQA/images/395975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What natural element is used most extensively in a vertical manner here?\nChoices:\n(A) fur\n(B) stone\n(C) velvet\n(D) hair" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.0546875, "loss": 4.306796073913574 }, { "id": "identity_275203", "image": "DVQA/images/bar_train_00008617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the highest total accuracy?" }, { "from": "gpt", "value": "The answer is manner" } ], "clip_score": 29.890625, "loss": 5.2726287841796875 }, { "id": "identity_277055", "image": "TextVQA/images/f247db5c4c259120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does the computer belong to?" }, { "from": "gpt", "value": "The answer is noc" } ], "clip_score": 26.109375, "loss": 4.246129989624023 }, { "id": "identity_33314", "image": "PMC-VQA/images/PMC3095991_F6_95603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the positive lymph node located?\nChoices:\n(A) Left fossa obturatoria \n(B) Right fossa obturatoria \n(C) Axillary region \n(D) Neck " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.671875, "loss": 4.468695640563965 }, { "id": "identity_2402", "image": "ScienceQA/images/6837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) sticky\n(B) colorful" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.3046875, "loss": 4.254061698913574 }, { "id": "identity_285290", "image": "A-OKVQA/images/376790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the quickest method to move away from the dock's edge among the following options: swimming, hang gliding, unicycle, or boat?\nChoices:\n(A) swimming\n(B) hang gliding\n(C) unicycle\n(D) boat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.984375, "loss": 4.057461738586426 }, { "id": "identity_191042", "image": "VQA2.0/images/305259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible walking past the \"KINGS SKY\" store?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.4375, "loss": 4.506128787994385 }, { "id": "identity_278160", "image": "VQA-RAD/images/synpic39301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probable reason behind these abnormalities?" }, { "from": "gpt", "value": "The answer is PRES" } ], "clip_score": 21.578125, "loss": 4.821964740753174 }, { "id": "identity_328390", "image": "AI2D/images/2141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, what occurs after the egg hatches inside the mother's body?\nChoices:\n(A) The egg is fertilized\n(B) The shark grows up\n(C) The pup is born\n(D) The pup swims away" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.640625, "loss": 4.040081977844238 }, { "id": "identity_293576", "image": "VQA2.0/images/361038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the awning on the building?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 13.703125, "loss": 4.225989818572998 }, { "id": "identity_332730", "image": "PMC-VQA/images/PMC6214163_Fig1_390454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which nodule showed up first?\nChoices:\n(A) Left posterior segment\n(B) Right posterior segment\n(C) Left lateral basal segment\n(D) Right lateral basal segment" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.0625, "loss": 4.19612455368042 }, { "id": "identity_310264", "image": "A-OKVQA/images/449634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these overhead wires used for?\nChoices:\n(A) power trains\n(B) internet\n(C) phone lines\n(D) electrical utility" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0390625, "loss": 4.211174964904785 }, { "id": "identity_69889", "image": "AI2D/images/3474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the salamander is labeled as 'Cute'?" }, { "from": "gpt", "value": "The answer is The nose" } ], "clip_score": 39.8125, "loss": 4.318673133850098 }, { "id": "identity_310751", "image": "PMC-VQA/images/PMC7756594_mrm28524-fig-0003_325276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dashed vertical line indicate?\nChoices:\n(A) Motion of the subject \n(B) Acquisition of shots \n(C) Separation between shots \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.984375, "loss": 4.84606409072876 }, { "id": "identity_196088", "image": "PMC-VQA/images/PMC4611029_Fig1_434760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 11.078125, "loss": 4.229229927062988 }, { "id": "identity_285", "image": "TextVQA/images/69eddefbe18eea44.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is on the cup?" }, { "from": "gpt", "value": "The answer is dd" } ], "clip_score": 17.0, "loss": 4.935638904571533 }, { "id": "identity_316836", "image": "A-OKVQA/images/451951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person with the drink at risk of?\nChoices:\n(A) falling hazard\n(B) fire hazard\n(C) flood hazard\n(D) slipping hazard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.03125, "loss": 4.6053667068481445 }, { "id": "identity_7064", "image": "PMC-VQA/images/PMC5615704_materials-10-01049-f008_220472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen on the left of the newly-formed bone? \nChoices:\n(A) Tissue rich in angiogenesis \n(B) Dense fibrous connective tissue \n(C) Both A and B \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 4.4290595054626465 }, { "id": "identity_288512", "image": "PMC-VQA/images/PMC4830041_Fig1_20947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the large tumor situated?\nChoices:\n(A) In the pelvic cavity.\n(B) In the left ventricle.\n(C) In the left atrium.\n(D) In the right atrium." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.625, "loss": 4.145890235900879 }, { "id": "identity_135829", "image": "FigureQA/images/22257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bar with the lowest value?" }, { "from": "gpt", "value": "The answer is Navy" } ], "clip_score": 23.0625, "loss": 5.401576995849609 }, { "id": "identity_303298", "image": "Super-CLEVR/images/superCLEVR_new_000661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the red matte object to the right of the big blue metal thing the same as the red matte thing in front of the large regular bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.734375, "loss": 5.089162349700928 }, { "id": "identity_294006", "image": "AI2D/images/3915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents Molecular hydrogen from the given choices. \n(A) C\n(B) A\n(C) G\n(D) E" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.53125, "loss": 4.003819465637207 }, { "id": "identity_126559", "image": "Super-CLEVR/images/superCLEVR_new_001507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car closest to the bottom of the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.140625, "loss": 4.321343421936035 }, { "id": "identity_311171", "image": "IconQA/images/94192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.546875, "loss": 4.303799629211426 }, { "id": "identity_268390", "image": "PMC-VQA/images/PMC7872619_f1-cpcem-05-125_364399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images provided?\nChoices:\n(A) Free air under the liver.\n(B) Dilated stomach.\n(C) Free air under the diaphragm.\n(D) Normal chest x-ray." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 4.081459999084473 }, { "id": "identity_305700", "image": "FigureQA/images/59258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.6875, "loss": 4.505368232727051 }, { "id": "identity_321996", "image": "FigureQA/images/45875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.046875, "loss": 4.038043022155762 }, { "id": "identity_273830", "image": "Super-CLEVR/images/superCLEVR_new_000070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the small car located to the left of the rubber road bike in front of the tiny yellow motorbike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 25.78125, "loss": 4.792717456817627 }, { "id": "identity_130632", "image": "A-OKVQA/images/491992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.140625, "loss": 4.426507949829102 }, { "id": "identity_135099", "image": "DVQA/images/bar_train_00005221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for 'jazz'?" }, { "from": "gpt", "value": "The answer is talent" } ], "clip_score": 34.40625, "loss": 4.058825969696045 }, { "id": "identity_286141", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f001_203185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the neuroforaminal sacral fracture, which zone of the sacrum is affected?\nChoices:\n(A) Zone I\n(B) Zone II\n(C) Zone III\n(D) Zone IV" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.546875, "loss": 4.005192756652832 }, { "id": "identity_296994", "image": "A-OKVQA/images/17474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of establishment is Canova categorized as? \n(A) library\n(B) restaurant\n(C) retail store\n(D) grocery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.0703125, "loss": 4.106668472290039 }, { "id": "identity_319121", "image": "FigureQA/images/92953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Dark Blue the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.046875, "loss": 4.314370155334473 }, { "id": "identity_191821", "image": "A-OKVQA/images/351422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella closest to the bottom of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.6875, "loss": 4.1924638748168945 }, { "id": "identity_307726", "image": "AI2D/images/3058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What provides protection to the cell?\nChoices:\n(A) chromosome\n(B) cell wall\n(C) nucleus\n(D) vacuole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.40625, "loss": 4.280852317810059 }, { "id": "identity_302722", "image": "VQA2.0/images/361844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the weather cloudy?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.3515625, "loss": 4.053760051727295 }, { "id": "identity_272613", "image": "DVQA/images/bar_train_00151878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the color forest green?" }, { "from": "gpt", "value": "The answer is thing" } ], "clip_score": 21.21875, "loss": 5.178240776062012 }, { "id": "identity_103516", "image": "TextVQA/images/2b92b3328950ac14.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is the runner participating in?" }, { "from": "gpt", "value": "The answer is Trail Cup" } ], "clip_score": 26.09375, "loss": 5.095387935638428 }, { "id": "identity_32684", "image": "PMC-VQA/images/PMC6798454_Fig2_52632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the scale bars represent?\nChoices:\n(A) Distance between the cells \n(B) Size of the region of interest \n(C) Magnification of the image \n(D) Time of recording " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.96875, "loss": 4.312727451324463 }, { "id": "identity_123741", "image": "A-OKVQA/images/162848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many memory cards are there?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.609375, "loss": 4.6488776206970215 }, { "id": "identity_2885", "image": "A-OKVQA/images/112377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the bus near the curb?\nChoices:\n(A) getting cleaned\n(B) getting passengers\n(C) refueling\n(D) changing tires" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 4.622877597808838 }, { "id": "identity_262078", "image": "VizWiz/images/VizWiz_train_00011688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is metal tool" } ], "clip_score": 24.859375, "loss": 4.010983943939209 }, { "id": "identity_221704", "image": "Super-CLEVR/images/superCLEVR_new_000172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the purple minivan?" }, { "from": "gpt", "value": "The answer is A bus" } ], "clip_score": 24.640625, "loss": 4.1378326416015625 }, { "id": "identity_304220", "image": "A-OKVQA/images/450592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these white horses used for?\nChoices:\n(A) pulling carriage\n(B) breeding\n(C) racing\n(D) tilling land" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.75, "loss": 4.068846702575684 }, { "id": "identity_130049", "image": "A-OKVQA/images/561919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign in the middle of the hedge?" }, { "from": "gpt", "value": "The answer is Please queue here for taxi" } ], "clip_score": 22.40625, "loss": 4.522119522094727 }, { "id": "identity_203109", "image": "TextVQA/images/57e1ac153cde6348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What other brand of beer is visible in the background?" }, { "from": "gpt", "value": "The answer is Achel" } ], "clip_score": 25.0625, "loss": 4.097703456878662 }, { "id": "identity_5691", "image": "FigureQA/images/23033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.623602867126465 }, { "id": "identity_286032", "image": "ScienceQA/images/17876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object possess?\nChoices:\n(A) rough\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.15625, "loss": 4.062240123748779 }, { "id": "identity_336864", "image": "A-OKVQA/images/246029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is most protected on the person in front?\nChoices:\n(A) knees\n(B) shins\n(C) chest\n(D) nose" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.2890625, "loss": 4.507071495056152 }, { "id": "identity_256707", "image": "VizWiz/images/VizWiz_train_00004665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of objects can you identify in the image, and what are they?" }, { "from": "gpt", "value": "The answer is nine" } ], "clip_score": 20.734375, "loss": 5.736784934997559 }, { "id": "identity_262764", "image": "ScienceQA/images/20791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Hampshire?\nChoices:\n(A) Newport\n(B) Concord\n(C) Boston\n(D) Albany" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.015625, "loss": 4.220102310180664 }, { "id": "identity_29267", "image": "PMC-VQA/images/PMC6032773_F5_338204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happened on the follow up radiograph?\nChoices:\n(A) Displacement of the radial head \n(B) Displacement of the ulnar head \n(C) Displacement of the radial notch \n(D) Displacement of the ulnar notch " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.03125, "loss": 4.004150390625 }, { "id": "identity_270409", "image": "AI2D/images/274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is another term for a gray wolf in this food web? Is it:\n(A) prey\n(B) producer\n(C) predator\n(D) energy source" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.875, "loss": 4.062173843383789 }, { "id": "identity_35004", "image": "PMC-VQA/images/PMC6194021_Fig4_383711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of section was used for the histological images? \nChoices:\n(A) PMMA \n(B) PVC \n(C) PEEK \n(D) PET " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.953125, "loss": 4.217763900756836 }, { "id": "identity_117584", "image": "Super-CLEVR/images/superCLEVR_new_001281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle located to the left of the purple dirtbike?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 28.296875, "loss": 4.878777980804443 }, { "id": "identity_5281", "image": "DVQA/images/bar_train_00092221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.15625, "loss": 4.397716522216797 }, { "id": "identity_319761", "image": "DVQA/images/bar_train_00114788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of one of the groups of bars from the left?" }, { "from": "gpt", "value": "The answer is sir" } ], "clip_score": 18.765625, "loss": 4.312130928039551 }, { "id": "identity_309402", "image": "PMC-VQA/images/PMC5964484_fig5_317749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in image (d) indicate?\nChoices:\n(A) Irrelevant information \n(B) ADC map abnormalities \n(C) FLAIR abnormalities \n(D) Increased attenuation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.619021415710449 }, { "id": "identity_338295", "image": "TQA/images/abc_question_images/human_system_nervous_16189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the frontal lobe?\nChoices:\n(A) V\n(B) P\n(C) D\n(D) X" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.703125, "loss": 4.744304656982422 }, { "id": "identity_150748", "image": "AI2D/images/3648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chambers are visible in this diagram of the heart?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.828125, "loss": 4.004416465759277 }, { "id": "identity_268346", "image": "ScienceQA/images/7144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this ring?\nChoices:\n(A) metal\n(B) concrete" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.125, "loss": 4.227083683013916 }, { "id": "identity_25211", "image": "IconQA/images/96392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a rocket. Which picture is fourth?\nChoices:\n(A) rocket\n(B) star\n(C) moon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.859375, "loss": 4.10513973236084 }, { "id": "identity_337533", "image": "PMC-VQA/images/PMC6627429_cancers-11-00847-f004_7769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the paraganglioma located in the image?\nChoices:\n(A) Behind something\n(B) In front of something\n(C) Next to something\n(D) Next to something else" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.921875, "loss": 4.361475944519043 }, { "id": "identity_37787", "image": "PMC-VQA/images/PMC4658764_Fig1_447501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe in the lung is affected by the ground glass opacity in the image?\nChoices:\n(A) right middle lobe \n(B) left upper lobe \n(C) right lower lobe \n(D) left lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 4.109710216522217 }, { "id": "identity_130668", "image": "VQA2.0/images/248194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.71875, "loss": 4.725760459899902 }, { "id": "identity_327", "image": "PMC-VQA/images/PMC6334377_fig3_423309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vertebrate is being described in the caption? \nChoices:\n(A) Cervical \n(B) Thoracic \n(C) Lumbar \n(D) Sacral " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.15625, "loss": 4.143875598907471 }, { "id": "identity_316749", "image": "IconQA/images/60878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.546875, "loss": 4.167045593261719 }, { "id": "identity_298654", "image": "PMC-VQA/images/PMC5126392_fig4_99947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical structure is depicted in the image provided?\nChoices:\n(A) Lumbar vertebra \n(B) Sacrum \n(C) Cervical vertebra \n(D) Thoracic vertebra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.205475807189941 }, { "id": "identity_297378", "image": "A-OKVQA/images/304134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the role of the man holding the orange stick in this scenario?\nChoices:\n(A) toll taker\n(B) traffic cop\n(C) seamstress\n(D) bus driver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.715455532073975 }, { "id": "identity_56783", "image": "FigureQA/images/7575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used in the pie chart?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.546875, "loss": 4.4282121658325195 }, { "id": "identity_298144", "image": "A-OKVQA/images/151371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company manufactures the large truck among the following options: Volvo, Peterbilt, Mack, or Daimler?\nChoices:\n(A) volvo\n(B) peterbilt\n(C) mack\n(D) daimler" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.703125, "loss": 4.030224800109863 }, { "id": "identity_113214", "image": "Super-CLEVR/images/superCLEVR_new_001172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly above the green scooter?" }, { "from": "gpt", "value": "The answer is A motorcycle" } ], "clip_score": 25.34375, "loss": 4.364855766296387 }, { "id": "identity_66303", "image": "TQA/images/question_images/cycle_rock_6728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many processes can directly transform igneous rocks into sedimentary rocks?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.40625, "loss": 4.3123650550842285 }, { "id": "identity_279069", "image": "ScienceQA/images/10258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Trenton\n(B) Phoenix\n(C) Manchester\n(D) Harrisburg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.53125, "loss": 4.050995826721191 }, { "id": "identity_3462", "image": "PMC-VQA/images/PMC3535132_Fig1_176628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fracture did the patient have on their left forearm? \nChoices:\n(A) Greenstick fracture \n(B) Comminuted fracture \n(C) Oblique fracture \n(D) Spiral fracture " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.121173858642578 }, { "id": "identity_118693", "image": "VQA2.0/images/319201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cupcakes are visible on the table?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 18.90625, "loss": 4.944319248199463 }, { "id": "identity_270053", "image": "VizWiz/images/VizWiz_train_00010982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide me with the name and brand of the box in this recording?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 14.7734375, "loss": 4.3588361740112305 }, { "id": "identity_183651", "image": "A-OKVQA/images/511299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.296875, "loss": 4.283134937286377 }, { "id": "identity_28658", "image": "ScienceQA/images/17944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Mississippi?\nChoices:\n(A) Jackson\n(B) Biloxi\n(C) Cambridge\n(D) Portland" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.238698959350586 }, { "id": "identity_271903", "image": "AI2D/images/2959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the body opening through which an animal consumes food?\nChoices:\n(A) tonsil\n(B) hard palate\n(C) mouth\n(D) tongue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.921875, "loss": 4.3315229415893555 }, { "id": "identity_116948", "image": "VQA2.0/images/233637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible people are wearing hats?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.7890625, "loss": 4.6326904296875 }, { "id": "identity_322436", "image": "PMC-VQA/images/PMC6163546_diagnostics-08-00053-f019_374966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is depicted in the schematic?\nChoices:\n(A) Frontal view \n(B) Lateral view \n(C) Oblique view \n(D) Transverse view" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.3671875, "loss": 4.519016265869141 }, { "id": "identity_320966", "image": "PMC-VQA/images/PMC5415669_fig3_159996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrow in the image?\nChoices:\n(A) Septal thickening \n(B) Chronic inflammatory infiltrate \n(C) Alveolar destruction \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 4.818904876708984 }, { "id": "identity_127177", "image": "VQA-AS/images/9407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are visible on the couch?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.859375, "loss": 4.341142177581787 }, { "id": "identity_228930", "image": "ScienceQA/images/6299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, then determine the capital of the state directly south of it" }, { "from": "gpt", "value": "The answer is Boston" } ], "clip_score": 26.28125, "loss": 4.134217262268066 }, { "id": "identity_262058", "image": "ScienceQA/images/3357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\n(A) Saint Paul\n(B) Fort Wayne\n(C) Indianapolis\n(D) Columbus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.78125, "loss": 4.150019645690918 }, { "id": "identity_30374", "image": "PMC-VQA/images/PMC4033054_F4_291575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain imaging technique was used to generate this image? \nChoices:\n(A) Positron Emission Tomography (PET) \n(B) Electroencephalography (EEG) \n(C) Magnetoencephalography (MEG) \n(D) Magnetic Resonance Imaging (MRI) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.359375, "loss": 4.131989479064941 }, { "id": "identity_338445", "image": "A-OKVQA/images/326236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this group on the horses doing?\nChoices:\n(A) conquering\n(B) racing\n(C) touring\n(D) fighting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.515625, "loss": 4.204789638519287 }, { "id": "identity_251643", "image": "A-OKVQA/images/572879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background of the image, and what are they doing?" }, { "from": "gpt", "value": "The answer is Four people; sitting, standing, and near trees" } ], "clip_score": 16.03125, "loss": 4.2721381187438965 }, { "id": "identity_2084", "image": "VQA-AS/images/5562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the girl playing with?" }, { "from": "gpt", "value": "The answer is soccer all and dog" } ], "clip_score": 26.453125, "loss": 4.331493854522705 }, { "id": "identity_30944", "image": "A-OKVQA/images/425945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What word is included in the name that is found on the bottom of the label in red?\nChoices:\n(A) loud\n(B) mouth\n(C) plumes\n(D) lime" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.296875, "loss": 4.644237995147705 }, { "id": "identity_294481", "image": "IconQA/images/78163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on? \nOptions:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.8125, "loss": 4.0447540283203125 }, { "id": "identity_304084", "image": "PMC-VQA/images/PMC6252907_F1_400623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the MRI image?\nChoices:\n(A) Brain\n(B) Pituitary gland\n(C) Spine\n(D) Lungs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.671875, "loss": 4.410402774810791 }, { "id": "identity_262182", "image": "ScienceQA/images/7573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following fossils is more likely to be younger: a dinosaur footprint or wood?\nChoices:\n(A) dinosaur footprint\n(B) wood" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.890625, "loss": 4.172109127044678 }, { "id": "identity_276461", "image": "A-OKVQA/images/191293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is this room situated?\nChoices:\n(A) office\n(B) school\n(C) home\n(D) store" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.375, "loss": 4.341720104217529 }, { "id": "identity_21518", "image": "DocVQA/images/kzcw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the E- mail address?" }, { "from": "gpt", "value": "The answer is kmittle@dwrite.com" } ], "clip_score": 15.6484375, "loss": 4.424184799194336 }, { "id": "identity_265683", "image": "ScienceQA/images/6607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city on the map is indicated by the marking? \nChoices:\n(A) Minneapolis\n(B) Milwaukee\n(C) Chicago\n(D) Omaha" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.765625, "loss": 4.466651916503906 }, { "id": "identity_271598", "image": "TabMWP/images/37289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nOptions:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.156022071838379 }, { "id": "identity_331184", "image": "VQA2.0/images/180205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this person?" }, { "from": "gpt", "value": "The answer is in sports park" } ], "clip_score": 23.984375, "loss": 4.330324649810791 }, { "id": "identity_222495", "image": "VQA2.0/images/423162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the stands?" }, { "from": "gpt", "value": "The answer is An integer number (counting required)" } ], "clip_score": 15.7890625, "loss": 4.347024917602539 }, { "id": "identity_263708", "image": "FigureQA/images/19393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.3125, "loss": 4.634132385253906 }, { "id": "identity_262704", "image": "PMC-VQA/images/PMC5004003_fig4_67761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this patient, where in the brain are the metastases located?\nChoices:\n(A) Left occipital lobe\n(B) Right occipital lobe\n(C) Left frontal lobe\n(D) Right frontal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.027388095855713 }, { "id": "identity_297281", "image": "A-OKVQA/images/87649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gold object is located near the front of the train? Is it a shield, handle, bell, or helmet?\nChoices:\n(A) shield\n(B) handle\n(C) bell\n(D) helmet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.609375, "loss": 4.325055122375488 }, { "id": "identity_307154", "image": "PMC-VQA/images/PMC4583733_Fig2_427137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which phase was this image captured?\nChoices:\n(A) Early Arterial phase \n(B) Hepatic Parenchymal phase \n(C) Portal venous phase \n(D) Late arterial phase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.828125, "loss": 4.0501298904418945 }, { "id": "identity_325360", "image": "PMC-VQA/images/PMC7693558_F2_306672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part indicated by the star?\nChoices:\n(A) Proximal part \n(B) Caudal part \n(C) Distal part \n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.921875, "loss": 4.72330379486084 }, { "id": "identity_335904", "image": "IconQA/images/49793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a plane. What is the third image?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.371584892272949 }, { "id": "identity_298334", "image": "ScienceQA/images/11278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.90625, "loss": 4.091668605804443 }, { "id": "identity_123747", "image": "PMC-VQA/images/PMC7148584_fig3_149588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the area of interest located on the left or right side of the image?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 20.0, "loss": 4.8268022537231445 }, { "id": "identity_133689", "image": "Super-CLEVR/images/superCLEVR_new_000048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the airplane's propeller?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 12.8828125, "loss": 4.705236911773682 }, { "id": "identity_320619", "image": "TabMWP/images/20965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Javier's campground is near two hiking trails with scenic endpoints. One trail leads to an underground lake at the bottom of a small cave. The other trail leads to a mountain peak. The elevation of each endpoint is shown on the campground map. Which trail's endpoint is farther from sea level?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.3125, "loss": 4.0291619300842285 }, { "id": "identity_264322", "image": "Super-CLEVR/images/superCLEVR_new_001379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the other small object made of the same material as the tiny cyan thing?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 25.203125, "loss": 4.136168479919434 }, { "id": "identity_308957", "image": "PMC-VQA/images/PMC7861362_pone.0244325.g005_359794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image was used for subtotal resection of the tumor?\nChoices:\n(A) CT images \n(B) T1-FLAIR images \n(C) T2-FLAIR images \n(D) T1 images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 4.375214576721191 }, { "id": "identity_17228", "image": "PMC-VQA/images/PMC3601997_F4_192991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image depict? \nChoices:\n(A) Mitral valve stenosis \n(B) Aortic valve stenosis \n(C) Ventricular Septal Defect \n(D) Tricuspid valve stenosis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.34375, "loss": 4.144869327545166 }, { "id": "identity_291740", "image": "A-OKVQA/images/115862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gesture is the group currently performing? \nOptions:\n(A) wave\n(B) fist\n(C) hang ten\n(D) salute" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.5234375, "loss": 4.113439083099365 }, { "id": "identity_35764", "image": "A-OKVQA/images/289213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of sport is he practicing?\nChoices:\n(A) team\n(B) winter\n(C) aquatic\n(D) combat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.6171875, "loss": 4.296687602996826 }, { "id": "identity_34896", "image": "PMC-VQA/images/PMC7061816_FIG1_130555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure is involved in the enhancing mass seen in the images?\nChoices:\n(A) Hippocampus \n(B) Caudate nucleus \n(C) Septum pellucidum and corpus callosum \n(D) Putamen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 4.125326633453369 }, { "id": "identity_225106", "image": "A-OKVQA/images/175831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From which floor's balcony could someone get the most optimal view of the flag and the entire front facade of the building, considering the height and angle of view?" }, { "from": "gpt", "value": "The answer is fourth" } ], "clip_score": 24.84375, "loss": 4.717343330383301 }, { "id": "identity_187956", "image": "A-OKVQA/images/357572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person on the sign?" }, { "from": "gpt", "value": "The answer is Dee Hunter" } ], "clip_score": 22.1875, "loss": 4.521523952484131 }, { "id": "identity_186764", "image": "A-OKVQA/images/488990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.71875, "loss": 4.253710746765137 }, { "id": "identity_289729", "image": "A-OKVQA/images/185334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the bikes being covered with junk?\nChoices:\n(A) for sale\n(B) showing off\n(C) confused\n(D) find easily" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.453125, "loss": 4.559990406036377 }, { "id": "identity_267534", "image": "DocVQA/images/zhfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you explain what the Mailbox is?" }, { "from": "gpt", "value": "The answer is off" } ], "clip_score": 18.0, "loss": 4.770761966705322 }, { "id": "identity_323982", "image": "PMC-VQA/images/PMC5029079_Fig1_74770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the thoracic window shown in the images?\nChoices:\n(A) Mediastinal Window\n(B) Pulmonary Window\n(C) Abdominothoracic Window\n(D) Cardiovascular Window" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.5625, "loss": 4.148409366607666 }, { "id": "identity_17673", "image": "FigureQA/images/44776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.4375, "loss": 4.021462440490723 }, { "id": "identity_313366", "image": "Super-CLEVR/images/superCLEVR_new_000123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the metal tandem bike to the left of the large purple object behind the large brown biplane?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 28.328125, "loss": 4.210806846618652 }, { "id": "identity_148013", "image": "DocVQA/images/jzhd0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hotel where the workshop is organized?" }, { "from": "gpt", "value": "The answer is Disneyland Hotel" } ], "clip_score": 16.671875, "loss": 4.0491557121276855 }, { "id": "identity_252917", "image": "VQA-AS/images/2490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the animal on the woman's lap, and how many legs of the animal are visible?" }, { "from": "gpt", "value": "The answer is White, three" } ], "clip_score": 17.375, "loss": 4.74751615524292 }, { "id": "identity_320053", "image": "A-OKVQA/images/368515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is a baby most likely to be in this image?\nChoices:\n(A) stroller\n(B) building\n(C) car\n(D) tree" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.59375, "loss": 4.272150993347168 }, { "id": "identity_116396", "image": "Super-CLEVR/images/superCLEVR_new_001855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the yellow object in the image?" }, { "from": "gpt", "value": "The answer is A plane" } ], "clip_score": 20.875, "loss": 4.4212870597839355 }, { "id": "identity_38343", "image": "TQA/images/abc_question_images/human_system_nervous_16189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which represents the frontal lobe?\nChoices:\n(A) V\n(B) P\n(C) D\n(D) X" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.734375, "loss": 5.0010857582092285 }, { "id": "identity_303013", "image": "Super-CLEVR/images/superCLEVR_new_001441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the red bicycle that is the same size as the yellow bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.140625, "loss": 4.647549629211426 }, { "id": "identity_268152", "image": "ScienceQA/images/14576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Idaho?\nChoices:\n(A) Helena\n(B) Nampa\n(C) Boise\n(D) Philadelphia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.078125, "loss": 4.100251197814941 }, { "id": "identity_266942", "image": "AI2D/images/4149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which process follows the formation of sediments in the geological cycle?\nChoices:\n(A) melting\n(B) metamorphism\n(C) uplift\n(D) compaction, cementation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.59375, "loss": 4.062453269958496 }, { "id": "identity_286431", "image": "AI2D/images/3356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the ladybug's body are the spots located?\nChoices:\n(A) Leg\n(B) Antennae\n(C) Pronotum\n(D) Elytra" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.9375, "loss": 4.114335060119629 }, { "id": "identity_318106", "image": "PMC-VQA/images/PMC5579036_Fig2_210075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the TEM photomicrographs?\nChoices:\n(A) GBM thickness, open slit pores, and necrotic tubules\n(B) Mitochondrial cristae, necrotic tubules, and healthy tubules\n(C) GBM thickness, open slit pores, and mitochondrial cristae\n(D) Mitochondrial cristae, open slit pores, and necrotic tubules" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.046875, "loss": 4.034314155578613 }, { "id": "identity_78473", "image": "ScienceQA/images/7977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many prominent tree trunks can be seen in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.9375, "loss": 4.917828559875488 }, { "id": "identity_36652", "image": "PMC-VQA/images/PMC5101592_fig-8_93353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the position of scale bar B in relation to the image? \nChoices:\n(A) Above the image \n(B) Below the image \n(C) To the left of the image \n(D) To the right of the image " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.15625, "loss": 4.184384346008301 }, { "id": "identity_302038", "image": "A-OKVQA/images/260182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting is this scene?\nChoices:\n(A) rural\n(B) urban\n(C) farm\n(D) suburban" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.6484375, "loss": 4.985214710235596 }, { "id": "identity_331549", "image": "FigureQA/images/1957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.265625, "loss": 4.302106857299805 }, { "id": "identity_12894", "image": "FigureQA/images/58896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.546875, "loss": 4.328404426574707 }, { "id": "identity_211490", "image": "A-OKVQA/images/4535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 9.578125, "loss": 4.611845016479492 }, { "id": "identity_309903", "image": "ScienceQA/images/8911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tallahassee\n(B) Charleston\n(C) Baton Rouge\n(D) Savannah" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.53125, "loss": 4.2066426277160645 }, { "id": "identity_974", "image": "TQA/images/question_images/earth_moon_phases_6006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which moon phase is furthest from the sun?\nChoices:\n(A) Full moon\n(B) Waxing crescent moon\n(C) New moon\n(D) Third quarter moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.390625, "loss": 4.156815052032471 }, { "id": "identity_322553", "image": "A-OKVQA/images/146560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this pole part of?\nChoices:\n(A) cell tower\n(B) phone line\n(C) barber shop\n(D) polo game" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.890625, "loss": 4.320507049560547 }, { "id": "identity_335116", "image": "Super-CLEVR/images/superCLEVR_new_001706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shiny car to the right of the small airliner the same color as the large car behind the small cyan object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 32.71875, "loss": 5.107224464416504 }, { "id": "identity_282204", "image": "IconQA/images/71170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have a flat surface or is it solid?\nChoices:\n(A) flat\n(B) solid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 4.195675849914551 }, { "id": "identity_325017", "image": "A-OKVQA/images/390346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the creature in the front have?\nChoices:\n(A) stinger\n(B) wings\n(C) horns\n(D) gills" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3125, "loss": 4.286276340484619 }, { "id": "identity_128459", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wheels on the purple bicycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.359375, "loss": 4.137035846710205 }, { "id": "identity_207751", "image": "VQA-AS/images/9851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the person's clothing?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.6875, "loss": 4.540223121643066 }, { "id": "identity_12904", "image": "FigureQA/images/23375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4453125, "loss": 4.301618576049805 }, { "id": "identity_328566", "image": "IconQA/images/91801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is shown here?\nChoices:\n(A) sphere\n(B) cylinder\n(C) cone\n(D) cube" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.506481170654297 }, { "id": "identity_305292", "image": "A-OKVQA/images/209844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person want to do with the ball?\nChoices:\n(A) catch it\n(B) bounce it\n(C) hit it\n(D) dodge it" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.625, "loss": 4.615314483642578 }, { "id": "identity_16020", "image": "Super-CLEVR/images/superCLEVR_new_001107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of small double buss to the right of the big red shiny thing the same as the number of small brown suvs left of the tiny cyan fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.5625, "loss": 4.859158992767334 }, { "id": "identity_320490", "image": "ScienceQA/images/9616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown here?\nChoices:\n(A) Maryland\n(B) Pennsylvania\n(C) Florida\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.5625, "loss": 4.065075874328613 }, { "id": "identity_328507", "image": "PMC-VQA/images/PMC7478461_F2_250391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this show?\nChoices:\n(A) A sagittal view of something\n(B) A transverse view of something\n(C) A coronal view of something\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.65625, "loss": 4.089772701263428 }, { "id": "identity_319007", "image": "AI2D/images/83.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the part of the Earth that is experiencing night?\nChoices:\n(A) E\n(B) D\n(C) C\n(D) B" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.561620235443115 }, { "id": "identity_325931", "image": "ScienceQA/images/19207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Pennsylvania\n(B) Rhode Island\n(C) Massachusetts\n(D) Georgia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.359375, "loss": 4.812037944793701 }, { "id": "identity_9849", "image": "FigureQA/images/34734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 4.353056907653809 }, { "id": "identity_222895", "image": "TextVQA/images/5af917d6bf6979b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of selling petrified wood according to the sign?" }, { "from": "gpt", "value": "The answer is Eliminates worries of little importance, helps provide access to past lives" } ], "clip_score": 34.59375, "loss": 5.025140762329102 }, { "id": "identity_265765", "image": "FigureQA/images/34604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 4.476452827453613 }, { "id": "identity_290651", "image": "PMC-VQA/images/PMC6698944_acm212654-fig-0005_25069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the patient depicted in the leftmost image:\n(A) Patient who has had surgery.\n(B) Patient with cerebrospinal fluid leakage.\n(C) Patient who has not had surgery.\n(D) Patient with normal cerebrospinal fluid." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.765625, "loss": 4.550179481506348 }, { "id": "identity_335024", "image": "A-OKVQA/images/315432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these stationary?\nChoices:\n(A) unseaworthy\n(B) no gas\n(C) docked\n(D) bad weather" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.1875, "loss": 4.007045269012451 }, { "id": "identity_30582", "image": "IconQA/images/85395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.6875, "loss": 4.336055755615234 }, { "id": "identity_44041", "image": "DVQA/images/bar_train_00159770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference in the \"luxury\" category?" }, { "from": "gpt", "value": "The answer is The object \"school" } ], "clip_score": 25.15625, "loss": 5.002162933349609 }, { "id": "identity_264049", "image": "Super-CLEVR/images/superCLEVR_new_001955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the large gray objects in front of the cyan mountain bike equal the number of big yellow rubber road bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.421875, "loss": 4.954188346862793 }, { "id": "identity_312141", "image": "FigureQA/images/24509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Navy Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.515625, "loss": 4.044436931610107 }, { "id": "identity_111897", "image": "A-OKVQA/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the chairs?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.390625, "loss": 5.108438968658447 }, { "id": "identity_275589", "image": "Super-CLEVR/images/superCLEVR_new_000280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other items made from the same material as the bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.84375, "loss": 5.091838359832764 }, { "id": "identity_329800", "image": "PMC-VQA/images/PMC8139247_f1_446974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the endoscopic image of moderate-grade PRNN?\nChoices:\n(A) Normal lining of the digestive tract \n(B) Deep ulcer \n(C) Superficial ulcer \n(D) No abnormality is seen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.375, "loss": 4.507904529571533 }, { "id": "identity_318456", "image": "Super-CLEVR/images/superCLEVR_new_001530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects made of the same material as the tiny purple object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.40625, "loss": 4.982315540313721 }, { "id": "identity_27698", "image": "A-OKVQA/images/223121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is the most likely destination for this airport?\nChoices:\n(A) united kingdom\n(B) australia\n(C) united states\n(D) china" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.859375, "loss": 4.237441062927246 }, { "id": "identity_334004", "image": "Super-CLEVR/images/superCLEVR_new_000278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large brown object made of the same material as the tiny object in front of the small shiny bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.71875, "loss": 4.944697856903076 }, { "id": "identity_34910", "image": "Super-CLEVR/images/superCLEVR_new_000091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a small matte object behind the metallic truck; does it have the same shape as the brown object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.078125, "loss": 5.272345066070557 }, { "id": "identity_291063", "image": "IconQA/images/98765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the sixth in the sequence, following the first picture of a rocket?\nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.859375, "loss": 4.494532585144043 }, { "id": "identity_64128", "image": "ScienceQA/images/6644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the bird's feathers closest to its beak?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.328125, "loss": 4.448570728302002 }, { "id": "identity_274865", "image": "FigureQA/images/91101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Maroon or Dark Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.0078125, "loss": 4.68687629699707 }, { "id": "identity_13907", "image": "ScienceQA/images/8631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Harrisburg\n(B) Pittsburgh\n(C) Philadelphia\n(D) Trenton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.296875, "loss": 4.125533103942871 }, { "id": "identity_273825", "image": "PMC-VQA/images/PMC8961915_Fig3_237822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color in the image represents the contact area?\nChoices:\n(A) Red\n(B) White\n(C) Green\n(D) Blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5, "loss": 4.136547565460205 }, { "id": "identity_238888", "image": "Super-CLEVR/images/superCLEVR_new_001437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more brown rubber fighters in front of the big regular bus than the total number of tiny brown matte fighters and blue metal motorbikes behind the big regular bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.265625, "loss": 4.7112274169921875 }, { "id": "identity_297010", "image": "A-OKVQA/images/451893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of trees are present in the background? Are they:\n(A) evergreen\n(B) tropical\n(C) deciduous\n(D) conifers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.3671875, "loss": 4.446045875549316 }, { "id": "identity_193198", "image": "TextVQA/images/034ab6f9af333146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the official partner displayed on the scoreboard?" }, { "from": "gpt", "value": "The answer is Xbox" } ], "clip_score": 21.0, "loss": 4.175727367401123 }, { "id": "identity_287068", "image": "A-OKVQA/images/403862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which character does the face on the train resemble? \n(A) choo\n(B) thomas\n(C) old yeller\n(D) choo" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.234375, "loss": 4.099220275878906 }, { "id": "identity_273397", "image": "VizWiz/images/VizWiz_train_00010275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this medication?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 6.71484375, "loss": 4.113940715789795 }, { "id": "identity_329450", "image": "FigureQA/images/96921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.40625, "loss": 4.333011627197266 }, { "id": "identity_142617", "image": "DocVQA/images/jrcy0227_34.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person or entity that received the money?" }, { "from": "gpt", "value": "The answer is Atzigi Comm on Unicefstan" } ], "clip_score": 21.578125, "loss": 7.231995105743408 }, { "id": "identity_34991", "image": "PMC-VQA/images/PMC7435020_F13_236787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells are shown in the confocal microscopy image? \nChoices:\n(A) Red blood cells \n(B) Bone marrow cells \n(C) Muscle cells \n(D) Brain cells " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 4.452606201171875 }, { "id": "identity_335150", "image": "ScienceQA/images/21193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Raleigh\n(B) Charlotte\n(C) Jackson\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 4.2936859130859375 }, { "id": "identity_322905", "image": "DocVQA/images/pqll0006_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name mentioned in this form?" }, { "from": "gpt", "value": "The answer is Alan B. Norman" } ], "clip_score": 19.796875, "loss": 4.78030252456665 }, { "id": "identity_122617", "image": "TextVQA/images/241bb0b26cbd4680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing markings are visible on the road?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.75, "loss": 4.471564769744873 }, { "id": "identity_271005", "image": "TQA/images/question_images/glaciers_6938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram represents a rounded hollow carved in the side of a mountain by a glacier? Is it:\n(A) arete\n(B) cirque\n(C) truncated spur\n(D) horn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.78125, "loss": 4.402010917663574 }, { "id": "identity_265776", "image": "IconQA/images/71909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the shape open or closed? Select the correct option:\n(A) open\n(B) closed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.765625, "loss": 4.267352104187012 }, { "id": "identity_12879", "image": "TextVQA/images/8c70eac1edc9b8be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the lanyard?" }, { "from": "gpt", "value": "The answer is demo" } ], "clip_score": 15.7578125, "loss": 4.486253261566162 }, { "id": "identity_25631", "image": "PMC-VQA/images/PMC7666285_Fig2_298397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used for the scan? \nChoices:\n(A) X-Ray \n(B) MRI \n(C) CT Scan \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.421875, "loss": 4.347716808319092 }, { "id": "identity_265368", "image": "TextVQA/images/4cf1cb2759735961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the home port of the sailboat on the left?" }, { "from": "gpt", "value": "The answer is cicago" } ], "clip_score": 27.28125, "loss": 4.390231609344482 }, { "id": "identity_227371", "image": "A-OKVQA/images/34597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the skaters' current positions and the setup of the ramp, what are the sequential steps they need to perform to successfully execute the maneuver \"dropping in\"?" }, { "from": "gpt", "value": "The answer is Position, commit, balance, control speed, transition" } ], "clip_score": 22.65625, "loss": 4.544421195983887 }, { "id": "identity_32241", "image": "PMC-VQA/images/PMC8747250_polymers-14-00038-f007_160910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white stars in the image represent? \nChoices:\n(A) blood vessels \n(B) dental plaque \n(C) smoke particles \n(D) water droplets " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.578125, "loss": 5.059182643890381 }, { "id": "identity_278769", "image": "FigureQA/images/12526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Gold the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.671875, "loss": 4.492114543914795 }, { "id": "identity_279588", "image": "TextVQA/images/165bdc1b0193a27a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is written on the street sign?" }, { "from": "gpt", "value": "The answer is boteler st" } ], "clip_score": 11.984375, "loss": 5.278849124908447 }, { "id": "identity_210980", "image": "TextVQA/images/8027f76e05dd8a97.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the word \"KITSCH\" appear in red color?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.953125, "loss": 4.0108184814453125 }, { "id": "identity_191845", "image": "PMC-VQA/images/PMC5432911_Fig8_165195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flower buds can be seen on the stem in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.21875, "loss": 4.990602493286133 }, { "id": "identity_148634", "image": "DocVQA/images/phvd0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of this document?" }, { "from": "gpt", "value": "The answer is PPIDS" } ], "clip_score": 21.890625, "loss": 4.894002914428711 }, { "id": "identity_4301", "image": "A-OKVQA/images/31923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which woman will get soaked the least?\nChoices:\n(A) middle\n(B) any\n(C) left\n(D) right" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2734375, "loss": 4.353493690490723 }, { "id": "identity_299959", "image": "PMC-VQA/images/PMC7684285_Fig4_303752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the action shown in this image?\nChoices:\n(A) Bone growth \n(B) Bone fracture \n(C) Bone dislocation \n(D) Bone inflammation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 4.484667778015137 }, { "id": "identity_11452", "image": "TQA/images/abc_question_images/nuclear_energy_17094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the reactor?\nChoices:\n(A) C\n(B) V\n(C) S\n(D) W" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.6449432373046875 }, { "id": "identity_261376", "image": "FigureQA/images/25657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.921875, "loss": 4.689103603363037 }, { "id": "identity_333721", "image": "A-OKVQA/images/546877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What effect do the tight ropes next to the blue boat have on it?\nChoices:\n(A) secure it\n(B) play games\n(C) signal\n(D) nothing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2109375, "loss": 4.231551170349121 }, { "id": "identity_4116", "image": "PMC-VQA/images/PMC4049752_f1-ol-07-06-1895_295889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image indicates that the lesion is hyperintense? \nChoices:\n(A) T1 images \n(B) FLAIR images \n(C) postcontrast images \n(D) T2 images " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.53125, "loss": 4.195871829986572 }, { "id": "identity_941", "image": "AI2D/images/2493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If A is Larvae, what is point F?\nChoices:\n(A) Larvae\n(B) Adult\n(C) Egg\n(D) Pupae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.765625, "loss": 4.168010234832764 }, { "id": "identity_277757", "image": "Super-CLEVR/images/superCLEVR_new_000831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the material of the object behind the small purple matte thing the same as that of the brown bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 34.125, "loss": 5.358522891998291 }, { "id": "identity_135427", "image": "DocVQA/images/ftxv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the effect of herbicide treatments on kochia growth after thinning according to the study?" }, { "from": "gpt", "value": "The answer is Suppresses growth or less than predicted value" } ], "clip_score": 19.171875, "loss": 5.069849014282227 }, { "id": "identity_310867", "image": "AI2D/images/898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If insects were removed from the web, who might go hungry?\nChoices:\n(A) Hawks\n(B) Rabbits\n(C) Birds\n(D) Squirrels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.15625, "loss": 4.270163059234619 }, { "id": "identity_22313", "image": "DVQA/images/bar_train_00035512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fifth bar from the left?" }, { "from": "gpt", "value": "The answer is degree" } ], "clip_score": 18.9375, "loss": 4.512856483459473 }, { "id": "identity_318041", "image": "A-OKVQA/images/402887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these items arranged and displayed?\nChoices:\n(A) to donate\n(B) to clean\n(C) to sell\n(D) as art" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.765625, "loss": 4.286636829376221 }, { "id": "identity_313152", "image": "A-OKVQA/images/479595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this device located?\nChoices:\n(A) fire house\n(B) church\n(C) school\n(D) cemetery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.8125, "loss": 4.366398334503174 }, { "id": "identity_3453", "image": "PMC-VQA/images/PMC6799394_F1_52713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which artery is shown in the images? \nChoices:\n(A) Right Vertebral Artery \n(B) Left Vertebral Artery \n(C) Femoral Artery \n(D) Carotid Artery " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.234375, "loss": 4.106015205383301 }, { "id": "identity_272376", "image": "A-OKVQA/images/373266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the content that the old man is currently viewing?\nChoices:\n(A) movie\n(B) operation\n(C) reality show\n(D) drama" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5, "loss": 4.761218547821045 }, { "id": "identity_333328", "image": "A-OKVQA/images/484666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might someone purchase a second-hand appliance around here?\nChoices:\n(A) off curb\n(B) pawn shop\n(C) 7 11\n(D) garbage can" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.96875, "loss": 4.121909141540527 }, { "id": "identity_105610", "image": "VQA-AS/images/10277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are visible on the sofa?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.71875, "loss": 4.421402931213379 }, { "id": "identity_283475", "image": "ScienceQA/images/15905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Kentucky\n(B) Florida\n(C) Connecticut\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.703125, "loss": 4.714909553527832 }, { "id": "identity_51331", "image": "DVQA/images/bar_train_00015868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is savage, rail" } ], "clip_score": 28.640625, "loss": 4.41790771484375 }, { "id": "identity_269966", "image": "Super-CLEVR/images/superCLEVR_new_000369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any fighters have the same material as the large brown motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.609375, "loss": 4.777726650238037 }, { "id": "identity_68160", "image": "TQA/images/question_images/parts_skeleton_7267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phalanges are labeled in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.53125, "loss": 4.595338821411133 }, { "id": "identity_336383", "image": "A-OKVQA/images/405271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How might people here move forward if they aren't going downhill?\nChoices:\n(A) taxi\n(B) using poles\n(C) uber\n(D) wind" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9296875, "loss": 4.225435733795166 }, { "id": "identity_283663", "image": "A-OKVQA/images/567287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the umbrellas being different colors?\nChoices:\n(A) privacy\n(B) random colors\n(C) being colorful\n(D) identification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.10541296005249 }, { "id": "identity_286288", "image": "VQA2.0/images/225104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Against what are the spectators leaning?" }, { "from": "gpt", "value": "The answer is wall" } ], "clip_score": 20.359375, "loss": 4.327754974365234 }, { "id": "identity_61089", "image": "DocVQA/images/rmch0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legible copies of the abstracts are required?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.96875, "loss": 4.931212425231934 }, { "id": "identity_283649", "image": "PMC-VQA/images/PMC6440287_F1_459012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging method was utilized in the study? \nOptions:\n(A) Computed Tomography (CT) scan\n(B) Magnetic Resonance Imaging (MRI) scan\n(C) Positron Emission Tomography (PET) scan\n(D) Ultrasound scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.8125, "loss": 4.468157768249512 }, { "id": "identity_295555", "image": "FigureQA/images/24549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 4.613670825958252 }, { "id": "identity_155682", "image": "TQA/images/question_images/convection_of_air_8046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct steps are shown in the air movement cycle in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.3125, "loss": 4.204018592834473 }, { "id": "identity_14292", "image": "IconQA/images/9892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of fish even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.015625, "loss": 4.014509677886963 }, { "id": "identity_335900", "image": "A-OKVQA/images/20133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the pink and white object provide?\nChoices:\n(A) power\n(B) music\n(C) games\n(D) cell service" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.890625, "loss": 4.4940900802612305 }, { "id": "identity_303092", "image": "VQA2.0/images/336077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this place crowded?" }, { "from": "gpt", "value": "The answer is baseball game" } ], "clip_score": 27.09375, "loss": 4.726668834686279 }, { "id": "identity_226548", "image": "A-OKVQA/images/441168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of players engaged in the game if each player is playing a different character on each screen, and how many characters are visible on the screen?" }, { "from": "gpt", "value": "The answer is two characters" } ], "clip_score": 21.453125, "loss": 4.735471725463867 }, { "id": "identity_286688", "image": "PMC-VQA/images/PMC7714745_FIG5_314705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the hepatic metastasis found?\nChoices:\n(A) Left lobe\n(B) Spleen\n(C) Right lobe\n(D) Undetermined" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.515625, "loss": 4.472414493560791 }, { "id": "identity_330936", "image": "A-OKVQA/images/147879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is up there?\nChoices:\n(A) balloons\n(B) lights\n(C) bats\n(D) squid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 4.3033952713012695 }, { "id": "identity_254854", "image": "TextVQA/images/8fffa9f900e37e77.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the names on the front of the person on the left and the back of the person on the right?" }, { "from": "gpt", "value": "The answer is orco, no name" } ], "clip_score": 19.90625, "loss": 5.03924560546875 }, { "id": "identity_292687", "image": "PMC-VQA/images/PMC5839822_F1_284420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI scan was performed?\nChoices:\n(A) CT scan\n(B) Diffusion-weighted imaging (DWI) scan\n(C) Magnetic resonance spectroscopy (MRS) scan\n(D) Apparent diffusion coefficient (ADC) map scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.140625, "loss": 4.15578556060791 }, { "id": "identity_292948", "image": "IconQA/images/94422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of hearts is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 4.087433815002441 }, { "id": "identity_265171", "image": "AI2D/images/4107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What gas is represented by D in the process of photosynthesis?\nChoices:\n(A) Carbon monoxide\n(B) Carbon dioxide\n(C) Oxygen\n(D) Water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.53125, "loss": 4.417604923248291 }, { "id": "identity_74529", "image": "TQA/images/question_images/aquifers_6522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the boundary between fresh groundwater and sea water?" }, { "from": "gpt", "value": "The answer is Normal interface" } ], "clip_score": 34.125, "loss": 4.6972975730896 }, { "id": "identity_284752", "image": "A-OKVQA/images/93894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the large rectangular object on the table with a screen typically used?\nChoices:\n(A) ordering\n(B) cooking\n(C) wiping\n(D) eating" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.09375, "loss": 4.492886543273926 }, { "id": "identity_338090", "image": "Super-CLEVR/images/superCLEVR_new_000367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the matte object to the right of the small yellow bicycle the same color as the large object in front of the large yellow matte school bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.34375, "loss": 5.047848701477051 }, { "id": "identity_191170", "image": "VQA2.0/images/367357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of accessory is the person on the right holding?" }, { "from": "gpt", "value": "The answer is A bag" } ], "clip_score": 18.59375, "loss": 4.019477844238281 }, { "id": "identity_283092", "image": "IconQA/images/18872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of diamonds is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.625, "loss": 4.0050272941589355 }, { "id": "identity_325288", "image": "A-OKVQA/images/392083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual in the elevated position observing?\nChoices:\n(A) sunset\n(B) swimmers\n(C) boats\n(D) sea slugs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.328125, "loss": 4.344696998596191 }, { "id": "identity_103832", "image": "Super-CLEVR/images/superCLEVR_new_001701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two visible wheels?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.875, "loss": 4.618405818939209 }, { "id": "identity_307999", "image": "AI2D/images/3502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the substance outside the nucleus of a cell?\nChoices:\n(A) centriole\n(B) cell membrane\n(C) cytoplasm\n(D) lysosome" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.921875, "loss": 4.142917156219482 }, { "id": "identity_223150", "image": "TextVQA/images/03e3bb402e4c2640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the woman's skirt in the poster?" }, { "from": "gpt", "value": "The answer is Diamond or harlequin" } ], "clip_score": 20.6875, "loss": 4.044021129608154 }, { "id": "identity_288963", "image": "ScienceQA/images/10585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from each battery to the surroundings or from the surroundings to each battery?\nChoices:\n(A) each battery . . . the surroundings\n(B) the surroundings . . . each battery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.046875, "loss": 4.665924549102783 }, { "id": "identity_197771", "image": "TextVQA/images/53b23a5b81600a08.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity depicted on the sign to the left of the 'B.V.D.' sign?" }, { "from": "gpt", "value": "The answer is Running" } ], "clip_score": 17.828125, "loss": 5.2101030349731445 }, { "id": "identity_283033", "image": "ScienceQA/images/6812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Utah?\n(A) Salt Lake City\n(B) Wichita\n(C) Provo\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.578125, "loss": 4.017973899841309 }, { "id": "identity_192241", "image": "A-OKVQA/images/112922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible field players are wearing white jerseys?" }, { "from": "gpt", "value": "The answer is Zero" } ], "clip_score": 22.265625, "loss": 4.003607749938965 }, { "id": "identity_272259", "image": "A-OKVQA/images/328935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the gathering of all the people? Is it for free food, traveling, a concert, or shopping?\nChoices:\n(A) free food\n(B) traveling\n(C) concert\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.671875, "loss": 4.423613548278809 }, { "id": "identity_316086", "image": "VizWiz/images/VizWiz_train_00008542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 21.4375, "loss": 4.283737659454346 }, { "id": "identity_337153", "image": "A-OKVQA/images/97097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the risk of engaging in this activity without a jacket?\nChoices:\n(A) starvation\n(B) hypothermia\n(C) bear attack\n(D) dehydration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.4609375, "loss": 4.784343242645264 }, { "id": "identity_324822", "image": "A-OKVQA/images/159987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is abundant here?\nChoices:\n(A) sand\n(B) dust\n(C) traffic\n(D) snow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.921875, "loss": 4.661546230316162 }, { "id": "identity_244967", "image": "VQA2.0/images/218406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different patterns are present on the bed and the wall behind it?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 26.5625, "loss": 4.058660984039307 }, { "id": "identity_269737", "image": "FigureQA/images/79558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.375, "loss": 4.030993461608887 }, { "id": "identity_254034", "image": "TextVQA/images/f8030210d0801c9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the main benefit communicated by Colgate Max Fresh in the challenge section?" }, { "from": "gpt", "value": "The answer is creating fresh breath" } ], "clip_score": 23.421875, "loss": 4.491782188415527 }, { "id": "identity_116488", "image": "PMC-VQA/images/PMC7913723_diagnostics-11-00230-f003_376942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiopacity seen in the center of the image indicative of?" }, { "from": "gpt", "value": "The answer is Heart" } ], "clip_score": 26.703125, "loss": 4.73824405670166 }, { "id": "identity_307138", "image": "ScienceQA/images/10341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted place?\nChoices:\n(A) Saint Vincent and the Grenadines\n(B) Grenada\n(C) Trinidad and Tobago\n(D) Dominica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.640625, "loss": 4.0850958824157715 }, { "id": "identity_18695", "image": "Super-CLEVR/images/superCLEVR_new_001223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the cyan truck and the green fighter have the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.828125, "loss": 5.265903472900391 }, { "id": "identity_22566", "image": "Super-CLEVR/images/superCLEVR_new_000634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there the same number of tiny yellow metal tandem bikes that are to the left of the gray bicycle and cars behind the gray rubber bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.359375, "loss": 4.912509918212891 }, { "id": "identity_320672", "image": "PMC-VQA/images/PMC7267678_fig0020_182999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed on the previous MRI?\nChoices:\n(A) No evident pathologies \n(B) Macro level siliconomas \n(C) Lateral lymph node \n(D) MRI report not mentioned." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.765625, "loss": 4.047798156738281 }, { "id": "identity_288992", "image": "Super-CLEVR/images/superCLEVR_new_001616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the number of gray items to the right of the small metal cruiser equal to the number of green objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.078125, "loss": 5.203711986541748 }, { "id": "identity_5537", "image": "A-OKVQA/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the umbrella being used for?\nChoices:\n(A) keeping dry\n(B) decoration\n(C) lighting\n(D) to dance" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.71875, "loss": 4.42565393447876 }, { "id": "identity_326691", "image": "PMC-VQA/images/PMC6455102_f4_462834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely cause of the focal uptakes seen in this image?\nChoices:\n(A) Osteoarthritis\n(B) Rheumatoid arthritis\n(C) Soft tissue injury\n(D) Cancer metastasis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.576967239379883 }, { "id": "identity_304165", "image": "DVQA/images/bar_train_00044584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest number of units sold?" }, { "from": "gpt", "value": "The answer is honor" } ], "clip_score": 26.453125, "loss": 4.156457424163818 }, { "id": "identity_298881", "image": "PMC-VQA/images/PMC1283984_F4_3833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color are the eosinophils depicted in the image?\nChoices:\n(A) Blue\n(B) Yellow\n(C) Green\n(D) Red." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4609375, "loss": 4.640079975128174 }, { "id": "identity_138751", "image": "DocVQA/images/qkjk0079_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position title of the individual for whom this performance plan is created?" }, { "from": "gpt", "value": "The answer is Director Promotion" } ], "clip_score": 22.328125, "loss": 4.204773426055908 }, { "id": "identity_263299", "image": "TextVQA/images/a2d519124a10d966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the restaurant that is depicted in the image?" }, { "from": "gpt", "value": "The answer is penny lane restaurant " } ], "clip_score": 21.40625, "loss": 4.43224573135376 }, { "id": "identity_107207", "image": "VQA2.0/images/212838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the person's thumb?" }, { "from": "gpt", "value": "The answer is Over the can's tab" } ], "clip_score": 15.15625, "loss": 4.4781975746154785 }, { "id": "identity_31266", "image": "PMC-VQA/images/PMC6475549_fig1_468927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality found in the CT scan? \nChoices:\n(A) Sphingomyelin deposition \n(B) Septal enhancement \n(C) Bronchitis \n(D) Asthma. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.921875, "loss": 4.439345836639404 }, { "id": "identity_129574", "image": "A-OKVQA/images/61997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paintings are visible on the display stand to the left?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 24.171875, "loss": 4.054287910461426 }, { "id": "identity_2243", "image": "Super-CLEVR/images/superCLEVR_new_000505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the metallic object that is the same color as the tiny metallic jet?" }, { "from": "gpt", "value": "The answer is fighter" } ], "clip_score": 19.765625, "loss": 5.0291972160339355 }, { "id": "identity_21876", "image": "A-OKVQA/images/37678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation should be done with the numbers to arrive at the years of steam power?\nChoices:\n(A) division\n(B) multiplication\n(C) subtraction\n(D) addition" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.953125, "loss": 4.217726707458496 }, { "id": "identity_25541", "image": "VQA-AS/images/11888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the windows have the same dimensions?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.953125, "loss": 4.154921054840088 }, { "id": "identity_303633", "image": "PMC-VQA/images/PMC5811017_pone.0192863.g001_275148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method was applied in this research?\nChoices:\n(A) Von Kossa staining\n(B) H&E staining\n(C) Sirius red staining\n(D) Congo red staining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.84375, "loss": 4.035933494567871 }, { "id": "identity_297009", "image": "IconQA/images/23355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the number of triangles considered even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.200186729431152 }, { "id": "identity_14859", "image": "ScienceQA/images/14878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following best describes an ecosystem in a California tide pool?\nChoices:\n(A) a school of fluffy sculpins\n(B) the giant green anemones, the ochre sea stars, and the red octopuses\n(C) the rocks, the salt water, and the California mussels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.671875, "loss": 4.080624103546143 }, { "id": "identity_314304", "image": "Super-CLEVR/images/superCLEVR_new_000280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else made of the same material as this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.09375, "loss": 4.6666975021362305 }, { "id": "identity_10756", "image": "PMC-VQA/images/PMC7756594_mrm28524-fig-0003_325276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the dashed vertical line represent? \nChoices:\n(A) Motion of the subject \n(B) Acquisition of shots \n(C) Separation between shots \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.421875, "loss": 4.858710289001465 }, { "id": "identity_126743", "image": "A-OKVQA/images/90196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person standing closest to the elephant's head?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.78125, "loss": 4.260833740234375 }, { "id": "identity_111067", "image": "VizWiz/images/VizWiz_train_00013282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the petals of the flowers?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 22.9375, "loss": 4.115889549255371 }, { "id": "identity_282719", "image": "A-OKVQA/images/376246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of jacket is commonly worn by people when skiing?\nChoices:\n(A) comforter\n(B) vest\n(C) parka\n(D) sweater" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 4.230887413024902 }, { "id": "identity_109752", "image": "A-OKVQA/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pendant lights are hanging above the kitchen island?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.71875, "loss": 4.363391876220703 }, { "id": "identity_319865", "image": "A-OKVQA/images/100516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What aspect of tennis is occurring?\nChoices:\n(A) serve\n(B) side spin\n(C) block\n(D) backhand" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 4.64127254486084 }, { "id": "identity_119100", "image": "PMC-VQA/images/PMC8550605_Fig37_97814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct light sources can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.46875, "loss": 4.399364471435547 }, { "id": "identity_313642", "image": "ScienceQA/images/19083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes this forest ecosystem?\nChoices:\n(A) It has cold, wet winters. It also has soil that is poor in nutrients.\n(B) It has cold, wet winters. It also has only a few types of trees." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.0625, "loss": 4.25222635269165 }, { "id": "identity_288079", "image": "PMC-VQA/images/PMC6262857_BIO038661F3_402705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is displayed in green?\n(A) Gsh2\n(B) Pax6\n(C) DAPI\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3984375, "loss": 5.040102958679199 }, { "id": "identity_314949", "image": "FigureQA/images/93775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower's value the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.09375, "loss": 4.132420063018799 }, { "id": "identity_204510", "image": "Super-CLEVR/images/superCLEVR_new_001726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the purple and orange vehicle?" }, { "from": "gpt", "value": "The answer is A yellow bus" } ], "clip_score": 25.09375, "loss": 4.746926784515381 }, { "id": "identity_304912", "image": "TextVQA/images/6ebfd89c13a6894a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number should I dial to reach the Grand Canal?" }, { "from": "gpt", "value": "The answer is nationals" } ], "clip_score": 22.171875, "loss": 6.119075775146484 }, { "id": "identity_27890", "image": "A-OKVQA/images/125117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area of a city would this be called?\nChoices:\n(A) outskirts\n(B) campus\n(C) downtown\n(D) suburbs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.140625, "loss": 4.641135215759277 }, { "id": "identity_21173", "image": "A-OKVQA/images/387857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Using the long handled pan shown is handy in what situation of cooking?\nChoices:\n(A) fireplace\n(B) electric oven\n(C) hot plate\n(D) microwave" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.390625, "loss": 4.284177303314209 }, { "id": "identity_306016", "image": "AI2D/images/15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were a sudden decrease in the number of large sharks, which organism would see a population increase?\nChoices:\n(A) shrimp\n(B) tuna\n(C) squid\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.53125, "loss": 4.375918388366699 }, { "id": "identity_311558", "image": "AI2D/images/841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the larvae?\nChoices:\n(A) B\n(B) F\n(C) G\n(D) C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.734375, "loss": 4.370077610015869 }, { "id": "identity_9668", "image": "TextVQA/images/bba20d711a767294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first line on the top of left page?" }, { "from": "gpt", "value": "The answer is on a watch sent home to" } ], "clip_score": 18.28125, "loss": 4.397832870483398 }, { "id": "identity_304899", "image": "FigureQA/images/30958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.32436466217041 }, { "id": "identity_271832", "image": "Super-CLEVR/images/superCLEVR_new_000604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the large object located in front of the green dirtbike and to the left of the gray aeroplane?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.34375, "loss": 4.09935188293457 }, { "id": "identity_186749", "image": "PMC-VQA/images/PMC4631848_fig3_440391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the bladder in the given scenario?" }, { "from": "gpt", "value": "The answer is Normal condition" } ], "clip_score": 23.21875, "loss": 4.838625907897949 }, { "id": "identity_17237", "image": "PMC-VQA/images/PMC4993752_F4_65235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) Hyphal growth in hair roots \n(B) Hyphal growth in control roots \n(C) No hyphal growth in hair roots \n(D) No hyphal growth in control roots" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.140625, "loss": 4.5199761390686035 }, { "id": "identity_23016", "image": "TabMWP/images/17812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.25, "loss": 4.165904998779297 }, { "id": "identity_304016", "image": "A-OKVQA/images/490511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this blue protective device help protect?\nChoices:\n(A) chest\n(B) head\n(C) knees\n(D) elbows" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.171875, "loss": 4.467133522033691 }, { "id": "identity_38578", "image": "PMC-VQA/images/PMC7360306_F2_213981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the new cavity on the left lower lobe?\nChoices:\n(A) posterior basal segment \n(B) medial segment \n(C) anterior basal segment \n(D) superior segment " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.90625, "loss": 4.463911056518555 }, { "id": "identity_132822", "image": "TextVQA/images/c05a0b7d0166ea5a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many subdials are present on the watch face?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.9375, "loss": 4.371207237243652 }, { "id": "identity_11372", "image": "PMC-VQA/images/PMC5265783_F1_118893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to capture the images? \nChoices:\n(A) CT Scan \n(B) Ultrasound \n(C) Magnetic Resonance Imaging \n(D) X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.71875, "loss": 4.005679130554199 }, { "id": "identity_26892", "image": "A-OKVQA/images/85081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is this man taking part in?\nChoices:\n(A) hiking\n(B) roller skating\n(C) skiboarding\n(D) skiing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.875, "loss": 4.2643351554870605 }, { "id": "identity_322723", "image": "IconQA/images/81862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape of this?\nChoices:\n(A) sphere\n(B) cylinder\n(C) cone\n(D) cube" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.492746829986572 }, { "id": "identity_336527", "image": "A-OKVQA/images/412764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What species is participating here?\nChoices:\n(A) feline\n(B) canine\n(C) bovine\n(D) ovine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.609375, "loss": 4.337818622589111 }, { "id": "identity_282534", "image": "A-OKVQA/images/289569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman depicted on the purple sign engaged in? Is it dancing, praying, eating, or singing?\nChoices:\n(A) dancing\n(B) praying\n(C) eating\n(D) singing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.546875, "loss": 4.413613319396973 }, { "id": "identity_213680", "image": "VQA2.0/images/450621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure are the people on?" }, { "from": "gpt", "value": "The answer is Bridge" } ], "clip_score": 22.28125, "loss": 4.203070163726807 }, { "id": "identity_10543", "image": "VQA2.0/images/247190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this animal a mammal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.265625, "loss": 4.014972686767578 }, { "id": "identity_329977", "image": "Super-CLEVR/images/superCLEVR_new_000846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small bicycle that is left of a large bicycle behind a road bike that is on the right side of a yellow bicycle made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.984375, "loss": 4.660922050476074 }, { "id": "identity_330041", "image": "PMC-VQA/images/PMC6812851_pone.0224138.g004_55964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining shown in this sample?\nChoices:\n(A) H&E stain\n(B) Ki67 stain\n(C) CD44 stain\n(D) TRPV5 and CaSR stain." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.34375, "loss": 4.752476692199707 }, { "id": "identity_311569", "image": "FigureQA/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.96875, "loss": 4.045895576477051 }, { "id": "identity_290413", "image": "VQA2.0/images/576082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the woman wearing boots?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.875, "loss": 4.192786693572998 }, { "id": "identity_276136", "image": "FigureQA/images/44947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 4.558161735534668 }, { "id": "identity_26518", "image": "A-OKVQA/images/97379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What missing items allows kites to be easily flown here?\nChoices:\n(A) kids\n(B) power lines\n(C) trains\n(D) parents" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.640625, "loss": 4.856544494628906 }, { "id": "identity_28074", "image": "A-OKVQA/images/416521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How do the people know each other?\nChoices:\n(A) rivals\n(B) coworkers\n(C) siblings\n(D) neighbors" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.84375, "loss": 4.5710320472717285 }, { "id": "identity_327340", "image": "IconQA/images/31236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) probable\n(B) unlikely\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.015625, "loss": 4.18923807144165 }, { "id": "identity_14188", "image": "A-OKVQA/images/292444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the job of the men on the horses?\nChoices:\n(A) doctors\n(B) judges\n(C) waiters\n(D) officers" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.71875, "loss": 4.188795566558838 }, { "id": "identity_198760", "image": "PMC-VQA/images/PMC6721794_fig2s1_32467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"NPFM\" indicate about the location of the structures within the boxes?" }, { "from": "gpt", "value": "The answer is A specific protein or gene marker location" } ], "clip_score": 31.4375, "loss": 4.591144561767578 }, { "id": "identity_313327", "image": "AI2D/images/3452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What paired structures extend from its head?\nChoices:\n(A) antennae\n(B) legs\n(C) none of the above\n(D) wing pads" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.421875, "loss": 4.096108436584473 }, { "id": "identity_323043", "image": "FigureQA/images/47152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.34375, "loss": 4.437185764312744 }, { "id": "identity_298315", "image": "TextVQA/images/034ab6f9af333146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which sport are they currently watching?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 21.53125, "loss": 4.475566387176514 }, { "id": "identity_283358", "image": "ScienceQA/images/4925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Salt Lake City\n(C) Phoenix\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.859375, "loss": 4.957512855529785 }, { "id": "identity_305200", "image": "TQA/images/abc_question_images/earth_eclipses_10684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the penumbra?\nChoices:\n(A) M\n(B) R\n(C) W\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.953125, "loss": 4.907569885253906 }, { "id": "identity_110930", "image": "VQA2.0/images/562488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are present on the tie?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.4375, "loss": 4.504819869995117 }, { "id": "identity_108123", "image": "TextVQA/images/5174c4a7a13965fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object placed on the top right corner inside the tray?" }, { "from": "gpt", "value": "The answer is A transparent circular container" } ], "clip_score": 21.703125, "loss": 5.048520088195801 }, { "id": "identity_323679", "image": "TextVQA/images/cd27c26641aad71f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the city shown in the image?" }, { "from": "gpt", "value": "The answer is chicago " } ], "clip_score": 15.9921875, "loss": 5.247714042663574 }, { "id": "identity_267396", "image": "AI2D/images/3541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following structures serves as protection for the cell membrane? \n(A) Cell wall\n(B) Vacuole\n(C) Membrane\n(D) Pore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 4.011153697967529 }, { "id": "identity_331953", "image": "TabMWP/images/36014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day were the most pies made according to the baker's record over the past 4 days?\n\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.78125, "loss": 4.018151760101318 }, { "id": "identity_337274", "image": "AI2D/images/2455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What term best describes E in this diagram?\nChoices:\n(A) Heartworm\n(B) Mosquito\n(C) Infective Larva\n(D) Microfilaria" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.03125, "loss": 4.376951217651367 }, { "id": "identity_288315", "image": "TextVQA/images/dd281aa1b2204a96.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand is being displayed?" }, { "from": "gpt", "value": "The answer is aquintus" } ], "clip_score": 18.828125, "loss": 4.572070598602295 }, { "id": "identity_220474", "image": "VQA2.0/images/226018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible windows are there on the tower?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.421875, "loss": 4.8167290687561035 }, { "id": "identity_282818", "image": "ScienceQA/images/311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this belt?\nChoices:\n(A) ceramic\n(B) leather" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.078125, "loss": 4.3657402992248535 }, { "id": "identity_130338", "image": "A-OKVQA/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 14.3671875, "loss": 4.449877738952637 }, { "id": "identity_215393", "image": "TextVQA/images/8cee9d4db49290e1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company advertised on the left field wall?" }, { "from": "gpt", "value": "The answer is FANCL" } ], "clip_score": 18.265625, "loss": 4.72573184967041 }, { "id": "identity_285176", "image": "VizWiz/images/VizWiz_train_00002901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this location?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.40625, "loss": 4.376188278198242 }, { "id": "identity_267658", "image": "FigureQA/images/92778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Slate or Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.25, "loss": 4.692200660705566 }, { "id": "identity_206012", "image": "VQA-AS/images/12834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.578125, "loss": 4.503116607666016 }, { "id": "identity_318558", "image": "FigureQA/images/7620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Dark Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.59375, "loss": 4.0629563331604 }, { "id": "identity_249573", "image": "A-OKVQA/images/490629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the environment and activities shown in the image, what are the possible reasons for the woman wearing a triangular hat, and how does each reason relate to the context of the image?" }, { "from": "gpt", "value": "The answer is protection" } ], "clip_score": 20.53125, "loss": 6.876614570617676 }, { "id": "identity_297137", "image": "PMC-VQA/images/PMC4459984_pone.0128306.g007_394562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the white arrow in the photomicrographs?\nChoices:\n(A) nucleus \n(B) cytoplasm \n(C) storage vesicles \n(D) extracellular matrix" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.796875, "loss": 4.945304870605469 }, { "id": "identity_334439", "image": "A-OKVQA/images/35473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this wire for?\nChoices:\n(A) power streetcars\n(B) guides streetcar\n(C) electric utility\n(D) phone lines" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.078125, "loss": 4.296815395355225 }, { "id": "identity_309164", "image": "PMC-VQA/images/PMC4381505_F4_373880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the labeled protein shown here?\nChoices:\n(A) pUbq10\n(B) TGN\n(C) G\n(D) CW" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.359375, "loss": 4.083810329437256 }, { "id": "identity_332288", "image": "TextVQA/images/951d338fd366d398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a place where items are sold?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 21.609375, "loss": 4.891757488250732 }, { "id": "identity_16928", "image": "FigureQA/images/70410.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.609375, "loss": 4.017863750457764 }, { "id": "identity_265451", "image": "AI2D/images/3703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is both elastic and used to collect liquid wastes for excretion? \n(A) a\n(B) k\n(C) c\n(D) d" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3359375, "loss": 4.213000297546387 }, { "id": "identity_25103", "image": "A-OKVQA/images/54304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red object in the food the man is eating?\nChoices:\n(A) apple\n(B) pepper\n(C) cherry\n(D) tomato" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.75, "loss": 4.026975154876709 }, { "id": "identity_192183", "image": "TextVQA/images/d6e8bc5b8ee5d66e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the podium?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 12.71875, "loss": 4.636722087860107 }, { "id": "identity_310423", "image": "TQA/images/abc_question_images/human_system_muscular_16164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the deltoid muscle?\nChoices:\n(A) E\n(B) Y\n(C) M\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.1875, "loss": 4.298622131347656 }, { "id": "identity_307553", "image": "Super-CLEVR/images/superCLEVR_new_001116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more rubber aeroplanes in front of this than tiny blue choppers?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.703125, "loss": 4.91962194442749 }, { "id": "identity_71083", "image": "ScienceQA/images/19193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the copepod play in this food web?" }, { "from": "gpt", "value": "The answer is Primary consumer" } ], "clip_score": 31.578125, "loss": 4.257974147796631 }, { "id": "identity_30709", "image": "PMC-VQA/images/PMC6142763_fig2_368840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the ultrasound images show? \nChoices:\n(A) Posteriorly located and contracted cervix and lower uterine segment. \n(B) Anteriorly located and stretched cervix and lower uterine segment. \n(C) Laterally located and expanded cervix and lower uterine segment. \n(D) Anteriorly located and contracted cervix and upper uterine segment. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.359375, "loss": 4.291150093078613 }, { "id": "identity_281008", "image": "FigureQA/images/77216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Firebrick or Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.34375, "loss": 4.9022088050842285 }, { "id": "identity_318586", "image": "ScienceQA/images/1847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) sticky\n(B) shiny" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.734375, "loss": 4.413538932800293 }, { "id": "identity_10447", "image": "A-OKVQA/images/402715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these animals have?\nChoices:\n(A) wings\n(B) long necks\n(C) wool\n(D) quills" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.703125, "loss": 4.429559230804443 }, { "id": "identity_2587", "image": "AI2D/images/3316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part does K represent ?\nChoices:\n(A) Peroxisomes\n(B) Plasmodesmata\n(C) Mitochondrion\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.3125, "loss": 4.107028007507324 }, { "id": "identity_214511", "image": "VizWiz/images/VizWiz_train_00015061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candies can be seen through the jar?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 26.5, "loss": 4.6774821281433105 }, { "id": "identity_291967", "image": "IconQA/images/102290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.96875, "loss": 4.405222415924072 }, { "id": "identity_297000", "image": "A-OKVQA/images/250194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the name of the street:\n(A) suzuran\n(B) mulberry\n(C) yancy\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.90625, "loss": 4.342035293579102 }, { "id": "identity_232285", "image": "VQA2.0/images/69856.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual indicators in the image that suggest it is winter, and how do they collectively confirm the season?" }, { "from": "gpt", "value": "The answer is Snow, heavy clothing, steam from buildings" } ], "clip_score": 26.953125, "loss": 4.420994281768799 }, { "id": "identity_335617", "image": "A-OKVQA/images/259567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are these individuals situated?\nChoices:\n(A) work\n(B) hospital\n(C) library\n(D) home" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.5859375, "loss": 4.161040306091309 }, { "id": "identity_209936", "image": "A-OKVQA/images/237919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umpire's shirt?" }, { "from": "gpt", "value": "The answer is Dark green" } ], "clip_score": 17.953125, "loss": 4.193276882171631 }, { "id": "identity_17517", "image": "AI2D/images/3446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is just above the vagina?\nChoices:\n(A) rectum\n(B) heart\n(C) ovary\n(D) crop" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.90625, "loss": 4.532044410705566 }, { "id": "identity_26314", "image": "Super-CLEVR/images/superCLEVR_new_000382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The car behind the small rubber car that is in front of the small purple motorbike is what color?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 34.65625, "loss": 4.096130847930908 }, { "id": "identity_199671", "image": "A-OKVQA/images/84162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many naval ships are docked in the harbor?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.875, "loss": 4.391598224639893 }, { "id": "identity_125374", "image": "PMC-VQA/images/PMC4101485_brainsci-04-00405-f002_306677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colors are used to represent different tracts in this image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 24.15625, "loss": 4.391633987426758 }, { "id": "identity_276766", "image": "A-OKVQA/images/356010.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of bears are depicted in the foreground? \n(A) black\n(B) polar\n(C) grizzly\n(D) panda" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.15625, "loss": 4.49130916595459 }, { "id": "identity_202752", "image": "A-OKVQA/images/27041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 13.4296875, "loss": 4.63616418838501 }, { "id": "identity_330425", "image": "PMC-VQA/images/PMC7409484_Fig2_229591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the arrows indicate the leakage of contrast agents?\nChoices:\n(A) Heart\n(B) Brain\n(C) Spine\n(D) Thorax" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.0625, "loss": 4.031999588012695 }, { "id": "identity_202048", "image": "A-OKVQA/images/377589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the blanket or covering on the person in the center of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 18.375, "loss": 4.083199977874756 }, { "id": "identity_35298", "image": "A-OKVQA/images/545022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the guy wearing a black hat doing?\nChoices:\n(A) singing\n(B) talking\n(C) resting\n(D) listening" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.953125, "loss": 4.323550224304199 }, { "id": "identity_22370", "image": "TQA/images/question_images/earth_moon_phases_2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A few days after the new moon phase, what do you call when the Moon will be visible again in a phase that lasts until the first quarter?\nChoices:\n(A) First Quarter\n(B) Waxing Gibbous\n(C) Waning Crescent\n(D) Waxing Crescent" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.09375, "loss": 4.22952938079834 }, { "id": "identity_73615", "image": "ScienceQA/images/12610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the glue spill?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 26.84375, "loss": 4.051152229309082 }, { "id": "identity_312064", "image": "Super-CLEVR/images/superCLEVR_new_000239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the car on the left side of the sedan behind the large cyan matte object?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 30.859375, "loss": 4.160061359405518 }, { "id": "identity_31442", "image": "PMC-VQA/images/PMC8069140_ijms-22-03867-f007_425738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to observe tumor activity?\nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.5, "loss": 4.6619954109191895 }, { "id": "identity_268264", "image": "FigureQA/images/45383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0, "loss": 4.444853782653809 }, { "id": "identity_327987", "image": "TQA/images/abc_question_images/layers_of_atmosphere_17069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the troposphere?\nChoices:\n(A) C\n(B) P\n(C) L\n(D) X" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.171875, "loss": 4.025148868560791 }, { "id": "identity_27424", "image": "PMC-VQA/images/PMC3835082_cancers-02-00364-f001_244962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to detect hemorrhages? \nChoices:\n(A) MRI \n(B) CT scan \n(C) Ultrasound \n(D) X-Ray " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.140625, "loss": 4.650054931640625 }, { "id": "identity_214649", "image": "VQA2.0/images/107353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking on the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.796875, "loss": 4.496853828430176 }, { "id": "identity_251624", "image": "TextVQA/images/068601131e743b00.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary message of the sign, and how does the imagery of the two figures below the sign contribute to reinforcing this message?" }, { "from": "gpt", "value": "The answer is No parking enforced by authority figures during funerals" } ], "clip_score": 32.34375, "loss": 4.035630702972412 }, { "id": "identity_22783", "image": "AI2D/images/3860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the plant leaf part indicated by alphabet C in the diagram?\nChoices:\n(A) margin\n(B) vein\n(C) leaf stalk\n(D) lamina" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.8125, "loss": 4.471470355987549 }, { "id": "identity_202206", "image": "PMC-VQA/images/PMC4933353_ppat.1005739.g001_47959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red arrows are present in the bottom row of the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.453125, "loss": 4.583619117736816 }, { "id": "identity_222104", "image": "A-OKVQA/images/528707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.96875, "loss": 4.3432135581970215 }, { "id": "identity_317015", "image": "PMC-VQA/images/PMC3056371_F0008_89900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the 3DCT volume rendered images indicate?\nChoices:\n(A) Microcephaly\n(B) Trigonocephaly\n(C) Plagiocephaly\n(D) Brachycephaly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.984375, "loss": 4.387391090393066 }, { "id": "identity_5969", "image": "AI2D/images/1475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Pyroclastic is composed of\nChoices:\n(A) none of above\n(B) rainwater\n(C) rock\n(D) salt water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.40625, "loss": 4.042134761810303 }, { "id": "identity_2584", "image": "A-OKVQA/images/247576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the marking on the road?\nChoices:\n(A) bike lane\n(B) crosswalk\n(C) railroad crossing\n(D) two lanes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.662734031677246 }, { "id": "identity_334070", "image": "PMC-VQA/images/PMC3865638_fig1_252301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in this case?\nChoices:\n(A) Subarachnoid hemorrhage\n(B) Cerebral microbleeds in corpus callosum\n(C) Hemorrhage in the frontal lobe\n(D) Normal brain imaging results." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0625, "loss": 4.196043968200684 }, { "id": "identity_38608", "image": "FigureQA/images/29061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Coral the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.859375, "loss": 4.028387546539307 }, { "id": "identity_305154", "image": "IconQA/images/10595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 4.475159645080566 }, { "id": "identity_322990", "image": "A-OKVQA/images/37514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the oval object on the bike tire for?\nChoices:\n(A) protection\n(B) esthetics\n(C) carrying things\n(D) mud flap" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.3125, "loss": 4.529117584228516 }, { "id": "identity_9709", "image": "TQA/images/abc_question_images/human_system_eye_16134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the cornea?\nChoices:\n(A) R\n(B) L\n(C) W\n(D) J" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0625, "loss": 4.316150188446045 }, { "id": "identity_10147", "image": "ScienceQA/images/6296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is chalk?\nChoices:\n(A) sedimentary\n(B) metamorphic\n(C) igneous" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 4.034698486328125 }, { "id": "identity_127377", "image": "TextVQA/images/dbc24c8e16427b94.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Superman costumes are displayed in the glass case?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 38.5625, "loss": 4.303644180297852 }, { "id": "identity_3742", "image": "A-OKVQA/images/42953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The helicopter assists which type of sports participants?\nChoices:\n(A) bowlers\n(B) none\n(C) skiers\n(D) sledders" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.046875, "loss": 4.290890216827393 }, { "id": "identity_324153", "image": "PMC-VQA/images/PMC7291422_Fig2_191427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow in the image indicate?\nChoices:\n(A) The liver of the foetus\n(B) The heart of the foetus\n(C) The dilated VGAM\n(D) The straight sinus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.859375, "loss": 4.3243327140808105 }, { "id": "identity_336821", "image": "IconQA/images/87512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. Which image is seventh?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.765625, "loss": 4.264122009277344 }, { "id": "identity_31330", "image": "A-OKVQA/images/158713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on top of the front wheel of the motorcycle?\nChoices:\n(A) basket\n(B) fruit\n(C) vegetables\n(D) bird" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2265625, "loss": 4.362387180328369 }, { "id": "identity_145734", "image": "DocVQA/images/jsnh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code for the Loveland factory?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 12.4765625, "loss": 4.524837970733643 }, { "id": "identity_210156", "image": "A-OKVQA/images/337675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tennis court?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.5, "loss": 4.004677772521973 }, { "id": "identity_270487", "image": "PMC-VQA/images/PMC6102916_Fig1_357108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary observation in the image?\nOptions:\n(A) Necrosis;\n(B) Tubular injury;\n(C) Glomerulitis;\n(D) Interstitial fibrosis." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.75, "loss": 5.2731852531433105 }, { "id": "identity_32016", "image": "PMC-VQA/images/PMC3166290_pone-0024187-g001_107299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What region of the brain is being shown in the image? \nChoices:\n(A) Cerebellum \n(B) Thalamus \n(C) Hypothalamus \n(D) Medulla " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.046875, "loss": 4.032203674316406 }, { "id": "identity_281295", "image": "PMC-VQA/images/PMC6946360_F1_95413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the image provided? \nChoices:\n(A) Swelling of septum \n(B) Septum perforation \n(C) Normal septum \n(D) Septum deviation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.484375, "loss": 4.072701454162598 }, { "id": "identity_66589", "image": "TQA/images/question_images/erosion_6862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which feature in the image is formed by the collapse of a sea cave?" }, { "from": "gpt", "value": "The answer is arch" } ], "clip_score": 30.546875, "loss": 4.3679962158203125 }, { "id": "identity_251749", "image": "A-OKVQA/images/400592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible on the messy desk, and what are they?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 28.84375, "loss": 5.471255302429199 }, { "id": "identity_4406", "image": "PMC-VQA/images/PMC8019334_FIG3_409135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the X-ray?\nChoices:\n(A) Anterior spinal fusion \n(B) Posterior spinal fusion \n(C) No visible spinal fusion \n(D) Fusion in the cervical spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.609375, "loss": 4.480703353881836 }, { "id": "identity_323723", "image": "PMC-VQA/images/PMC8470686_brainsci-11-01169-f002_69305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two processes indicated in the fluoroscopic image?\nChoices:\n(A) Medial and Lateral processes\n(B) Inferior and Superior processes\n(C) Anterior and Posterior processes\n(D) Radial and Ulnar processes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.546875, "loss": 4.496763706207275 }, { "id": "identity_327003", "image": "PMC-VQA/images/PMC6186532_Fig1_381772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are shown in these rows?\nChoices:\n(A) coronal fusion slices \n(B) sagittal fusion slices \n(C) transaxial fusion slices \n(D) PET/CT images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.8125, "loss": 5.047061920166016 }, { "id": "identity_222323", "image": "A-OKVQA/images/174766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding cameras?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 12.75, "loss": 4.9972991943359375 }, { "id": "identity_201035", "image": "TextVQA/images/b724d50821bcc3ec.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct numbers can be seen within the artwork?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.921875, "loss": 4.037569046020508 }, { "id": "identity_288548", "image": "A-OKVQA/images/43344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which individual is seated in the middle among the following options: Rafael Nadal, Jack Perry, Bo Jackson, or Djimon Hounsou?\nChoices:\n(A) rafael nadal\n(B) jack perry\n(C) bo jackson\n(D) djimon hounsou" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.53125, "loss": 4.044363498687744 }, { "id": "identity_37599", "image": "A-OKVQA/images/114414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is number 2 going to do now?\nChoices:\n(A) sit\n(B) return home\n(C) hit someone\n(D) run" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.2421875, "loss": 4.0348334312438965 }, { "id": "identity_279744", "image": "VizWiz/images/VizWiz_train_00014628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of speaker is this, and what is the quantity of speakers present?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.40625, "loss": 4.495781421661377 }, { "id": "identity_337879", "image": "IconQA/images/78797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first one is a bear. What is the third one?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.734375, "loss": 4.571621417999268 }, { "id": "identity_36655", "image": "A-OKVQA/images/448851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The cats on the desk are illuminated by what source of light?\nChoices:\n(A) sun\n(B) desk lamp\n(C) moon\n(D) overhead light" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.389829635620117 }, { "id": "identity_193756", "image": "PMC-VQA/images/PMC3621383_fig6_197579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are indicated by the arrows in the image?" }, { "from": "gpt", "value": "The answer is Blood vessels" } ], "clip_score": 24.390625, "loss": 4.268692970275879 }, { "id": "identity_128990", "image": "TextVQA/images/8cd6412c8f6ad3fa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the aircraft?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 22.203125, "loss": 4.012970924377441 }, { "id": "identity_51115", "image": "PlotQA/images/107265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the Female enrollment rate?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 24.421875, "loss": 4.009922504425049 }, { "id": "identity_104", "image": "ScienceQA/images/14119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Connecticut?\nChoices:\n(A) Boston\n(B) Hartford\n(C) New Haven\n(D) Bridgeport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.6875, "loss": 4.206075191497803 }, { "id": "identity_302227", "image": "IconQA/images/21404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) impossible\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 4.341488838195801 }, { "id": "identity_325068", "image": "PMC-VQA/images/PMC5807339_F1_274149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the left part of the image?\nChoices:\n(A) A spectrum in a single voxel extracted from the multivoxel data at the location of the tumor.\n(B) An X-ray image of the tumor\n(C) A CT scan of the tumor\n(D) The location of the tumor on a brain diagram" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.96875, "loss": 4.630767822265625 }, { "id": "identity_194617", "image": "TextVQA/images/0a1312ef780d8d53.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are stacked on the desk?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 20.5625, "loss": 4.590357303619385 }, { "id": "identity_23707", "image": "Super-CLEVR/images/superCLEVR_new_001966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a big thing that is right of the big regular bus; is it the same shape as the big yellow matte object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.5, "loss": 5.066391944885254 }, { "id": "identity_28409", "image": "A-OKVQA/images/117690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the dog flying toward the frisbee?\nChoices:\n(A) is diving\n(B) to catch\n(C) was thrown\n(D) is confused" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.84375, "loss": 4.154224395751953 }, { "id": "identity_304065", "image": "TQA/images/abc_question_images/aquifers_16532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the diagram labeled F?\nChoices:\n(A) CONFINED AQUIFER \n(B) WATER TABLE WELL\n(C) FLOWING ARTESIAN WELL\n(D) UNCONFINED AQUIFER" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 39.03125, "loss": 4.244899272918701 }, { "id": "identity_206172", "image": "A-OKVQA/images/163047.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 17.09375, "loss": 4.346362590789795 }, { "id": "identity_41806", "image": "DocVQA/images/jnjm0223_59.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is associated with the study on \"Amitriptyline comparator in Cancer-Pain--full spectrum of cancer pain\"?" }, { "from": "gpt", "value": "The answer is UK" } ], "clip_score": 14.609375, "loss": 4.079787254333496 }, { "id": "identity_275425", "image": "A-OKVQA/images/496399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where will the gentleman in the suit make a call to his sweetheart?\nChoices:\n(A) inside building\n(B) phone booth\n(C) taxi\n(D) standing there" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.375, "loss": 4.381382942199707 }, { "id": "identity_71328", "image": "ScienceQA/images/14391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tubes are connected to the top of the bioreactor?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.078125, "loss": 4.179014205932617 }, { "id": "identity_69387", "image": "AI2D/images/1851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are present in this food web?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 32.3125, "loss": 4.065363883972168 }, { "id": "identity_263530", "image": "TextVQA/images/a1058ccb3c5ae651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From where does the middle beer originate?" }, { "from": "gpt", "value": "The answer is usquena" } ], "clip_score": 24.453125, "loss": 5.45785665512085 }, { "id": "identity_261852", "image": "DVQA/images/bar_train_00051688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.875, "loss": 4.196059226989746 }, { "id": "identity_29213", "image": "DocVQA/images/hzxd0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first title in the document?" }, { "from": "gpt", "value": "The answer is Woodard Research Corporation" } ], "clip_score": 22.703125, "loss": 4.135406494140625 }, { "id": "identity_265007", "image": "PMC-VQA/images/PMC6301901_Fig1_413476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which area(s) of the body was there abnormal uptake of Fluorodeoxyglucose observed? \nChoices:\n(A) Only in the left hilar lymph nodes \n(B) Only in the right hilar lymph nodes \n(C) In both the left and right hilar lymph nodes \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.234375, "loss": 4.32724666595459 }, { "id": "identity_75054", "image": "TQA/images/abc_question_images/parts_telescope_18154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the light travel through the Newtonian Reflector as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Enters, reflects off S, off X, through D" } ], "clip_score": 37.3125, "loss": 4.143209934234619 }, { "id": "identity_264807", "image": "FigureQA/images/70459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the smoothest?\n(A) No\n(B) Yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8359375, "loss": 4.142037391662598 }, { "id": "identity_299434", "image": "TextVQA/images/4fcb44082d79b46f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the name of the white wine?" }, { "from": "gpt", "value": "The answer is cheap white whine" } ], "clip_score": 20.703125, "loss": 4.064720630645752 }, { "id": "identity_217350", "image": "A-OKVQA/images/45222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.328125, "loss": 4.3011579513549805 }, { "id": "identity_263719", "image": "AI2D/images/1084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject matter represented by this diagram?\nChoices:\n(A) brain\n(B) eyeball\n(C) atom\n(D) cell" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.8125, "loss": 4.036837100982666 }, { "id": "identity_17979", "image": "DVQA/images/bar_train_00120037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of fall larger than trail?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.71875, "loss": 4.0461530685424805 }, { "id": "identity_126262", "image": "TextVQA/images/8f693bc2dd576300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the red wine in the bottom left corner?" }, { "from": "gpt", "value": "The answer is Stowells" } ], "clip_score": 22.09375, "loss": 4.202389240264893 }, { "id": "identity_36762", "image": "ScienceQA/images/1922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Ohio?\nChoices:\n(A) Columbus\n(B) Houston\n(C) Lincoln\n(D) Carson City" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.859375, "loss": 4.297056198120117 }, { "id": "identity_302579", "image": "IconQA/images/84769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a square face on this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.875, "loss": 4.446081638336182 }, { "id": "identity_274850", "image": "ScienceQA/images/19314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the organism that serves as the tertiary consumer in this food web:\nChoices:\n(A) grizzly bear\n(B) snowy owl\n(C) parasitic jaeger" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 4.0419602394104 }, { "id": "identity_300038", "image": "A-OKVQA/images/431738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the person in black seated on the court?\nChoices:\n(A) host\n(B) official\n(C) relative\n(D) announcer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.578125, "loss": 4.6556291580200195 }, { "id": "identity_193781", "image": "Super-CLEVR/images/superCLEVR_new_000078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned closest to the bottom edge of the image?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 21.453125, "loss": 4.175324440002441 }, { "id": "identity_186219", "image": "VizWiz/images/VizWiz_train_00019764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.953125, "loss": 4.506649971008301 }, { "id": "identity_14612", "image": "DocVQA/images/hglg0078_36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the Grade Field ?" }, { "from": "gpt", "value": "The answer is CG3" } ], "clip_score": 20.1875, "loss": 4.300159454345703 }, { "id": "identity_280973", "image": "FigureQA/images/75264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Seafoam the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.6875, "loss": 4.000123023986816 }, { "id": "identity_315796", "image": "FigureQA/images/29892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.4375, "loss": 4.6306962966918945 }, { "id": "identity_271761", "image": "TextVQA/images/006f378c1ca56fd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the speaker of the quote shown in the picture?" }, { "from": "gpt", "value": "The answer is pedro obispo" } ], "clip_score": 11.953125, "loss": 4.211523056030273 }, { "id": "identity_289584", "image": "TQA/images/abc_question_images/human_system_excretory_16111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is urine produced? \nChoices:\n(A) T\n(B) H\n(C) M\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.465124607086182 }, { "id": "identity_31967", "image": "PMC-VQA/images/PMC5471892_Fig4_179489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the Maltese dog was imaged in the histological sections? \nChoices:\n(A) Right tibia \n(B) Left humerus \n(C) Right ear \n(D) Left eye " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.390625, "loss": 4.339035511016846 }, { "id": "identity_320987", "image": "Super-CLEVR/images/superCLEVR_new_000371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is both in front of a large rubber bus and on the left side of a tiny school bus?" }, { "from": "gpt", "value": "The answer is regular bus" } ], "clip_score": 26.421875, "loss": 5.01339864730835 }, { "id": "identity_219611", "image": "VQA2.0/images/231320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the clock with the latest time displayed?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.4375, "loss": 4.172177314758301 }, { "id": "identity_217691", "image": "A-OKVQA/images/508985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the motorcycle in the foreground?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 24.796875, "loss": 4.0765767097473145 }, { "id": "identity_239391", "image": "AI2D/images/2511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the letters corresponding to the winter solstice and the spring equinox, and then determine the letter that is directly opposite to the summer solstice" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 29.03125, "loss": 4.720658779144287 }, { "id": "identity_5819", "image": "Super-CLEVR/images/superCLEVR_new_000949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny brown aeroplane made of the same material as the tiny brown fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.5625, "loss": 5.371110916137695 }, { "id": "identity_199827", "image": "VQA2.0/images/320875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the mixer on the counter?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.5, "loss": 4.057930946350098 }, { "id": "identity_275471", "image": "A-OKVQA/images/320978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vegetable is expected to be the most costly item per piece or per pound among the following options: artichoke, celery, corn, or turnips?\nChoices:\n(A) artichoke\n(B) celery\n(C) corn\n(D) turnips" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.359375, "loss": 4.168100357055664 }, { "id": "identity_322663", "image": "A-OKVQA/images/53975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the waiting area divided?\nChoices:\n(A) race\n(B) intelligence\n(C) gender\n(D) age" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.609375, "loss": 4.308540344238281 }, { "id": "identity_182060", "image": "PMC-VQA/images/PMC5306347_Fig7_127088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct lesions can be identified in this image?" }, { "from": "gpt", "value": "The answer is Two lesions" } ], "clip_score": 28.03125, "loss": 4.027326583862305 }, { "id": "identity_30349", "image": "PMC-VQA/images/PMC5265193_Fig7_118640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the suspected diagnosis for the patient based on the MRI findings? \nChoices:\n(A) Meningioma \n(B) Metastasis \n(C) Stroke \n(D) Parkinson's disease " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.40625, "loss": 4.481455326080322 }, { "id": "identity_9590", "image": "DocVQA/images/zhpc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the sender of this document?" }, { "from": "gpt", "value": "The answer is MARTIN J. FORMAN , MR/FFP" } ], "clip_score": 23.625, "loss": 4.293883800506592 }, { "id": "identity_245111", "image": "A-OKVQA/images/333434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the language seen on these signs, identify the country where this beach is likely located and provide a brief explanation of your reasoning" }, { "from": "gpt", "value": "The answer is Japan" } ], "clip_score": 31.453125, "loss": 4.295558929443359 }, { "id": "identity_37372", "image": "A-OKVQA/images/244795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is coming out of the clock?\nChoices:\n(A) vapor\n(B) water\n(C) steam\n(D) smoke" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.28125, "loss": 4.084975242614746 }, { "id": "identity_25101", "image": "ScienceQA/images/3474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Kentucky\n(B) New Hampshire\n(C) Pennsylvania\n(D) Maryland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.03125, "loss": 4.058869361877441 }, { "id": "identity_17541", "image": "VQA2.0/images/119458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky clear?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 13.7109375, "loss": 4.380341529846191 }, { "id": "identity_17275", "image": "AI2D/images/3368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the brightly colored parts of a flower immediately surrounding the reproductive organs; a division of the corolla called?\nChoices:\n(A) Anthers\n(B) Pistil\n(C) Petal\n(D) Sepal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 4.8244123458862305 }, { "id": "identity_89", "image": "ScienceQA/images/10111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) rough\n(B) transparent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.328125, "loss": 4.735735893249512 }, { "id": "identity_308835", "image": "TQA/images/abc_question_images/layers_of_atmosphere_17073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the stratosphere?\nChoices:\n(A) F\n(B) J\n(C) R\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.453125, "loss": 4.443638324737549 }, { "id": "identity_267825", "image": "A-OKVQA/images/97427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the source of heat for the stovetop burners? Is it:\n(A) natural gas\n(B) electricity\n(C) wood\n(D) coal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.703125, "loss": 4.484846591949463 }, { "id": "identity_247810", "image": "A-OKVQA/images/54039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for covering vehicles in an urban environment, and which reason is most likely based on the image provided?" }, { "from": "gpt", "value": "The answer is protection" } ], "clip_score": 28.96875, "loss": 5.779053688049316 }, { "id": "identity_310225", "image": "PMC-VQA/images/PMC6026116_Fig4_335593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is depicted in (a)?\nChoices:\n(A) PA joint image \n(B) LED image \n(C) Combined image \n(D) MRI image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.46875, "loss": 4.55730676651001 }, { "id": "identity_128222", "image": "Super-CLEVR/images/superCLEVR_new_000404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located directly behind the cyan bicycle?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.578125, "loss": 4.1579437255859375 }, { "id": "identity_320020", "image": "A-OKVQA/images/365618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of lights are on on the vehicle?\nChoices:\n(A) headlights\n(B) siren\n(C) brake\n(D) turn signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.21875, "loss": 4.2496843338012695 }, { "id": "identity_26103", "image": "PMC-VQA/images/PMC7010826_Fig4_114094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the border of the lesion? \nChoices:\n(A) stratum \n(B) meandering \n(C) spherical \n(D) dispersed " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.859375, "loss": 4.968293190002441 }, { "id": "identity_363", "image": "ScienceQA/images/19733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) persimmon tree\n(B) silver maple\n(C) pine vole\n(D) swallowtail caterpillar" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.890625, "loss": 4.178217887878418 }, { "id": "identity_122182", "image": "VQA2.0/images/293741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many carriages are attached to the steam engine?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.0625, "loss": 4.802619457244873 }, { "id": "identity_6070", "image": "PMC-VQA/images/PMC4150439_fig1_316870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the injury located? \nChoices:\n(A) Medial proximal femoral avulsion fracture \n(B) Lateral distal femoral avulsion fracture \n(C) Medial proximal tibial avulsion fracture \n(D) Lateral distal tibial avulsion fracture " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.125, "loss": 4.368042469024658 }, { "id": "identity_223593", "image": "VizWiz/images/VizWiz_train_00019600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the logo on the cup?" }, { "from": "gpt", "value": "The answer is A cup with steam" } ], "clip_score": 26.796875, "loss": 4.5214715003967285 }, { "id": "identity_565", "image": "TQA/images/question_images/human_system_digestive_6089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the caecum and the rectum?\nChoices:\n(A) esophagus\n(B) small intestine\n(C) colon\n(D) stomach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.640625, "loss": 4.154755592346191 }, { "id": "identity_295844", "image": "PMC-VQA/images/PMC4013614_f1-ijms-15-06046_286675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in image b?\nOptions:\n(A) Positive cancer tissue.\n(B) Negative cancer tissue.\n(C) Positive bone tissue.\n(D) Negative bone tissue." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5625, "loss": 4.191897869110107 }, { "id": "identity_118254", "image": "TextVQA/images/5a6da5b6ce10733a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arches are visible under the white building?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.546875, "loss": 4.684334754943848 }, { "id": "identity_290111", "image": "A-OKVQA/images/580082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of outfit is the person wearing while behaving unusually? \nChoices:\n(A) white shirt\n(B) black suit\n(C) tennis dress\n(D) black dress" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.59375, "loss": 4.208944797515869 }, { "id": "identity_284010", "image": "DVQA/images/bar_train_00155441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the color red?" }, { "from": "gpt", "value": "The answer is mean" } ], "clip_score": 26.25, "loss": 4.884254455566406 }, { "id": "identity_71458", "image": "ScienceQA/images/5560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of prey does the Arctic fox have in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.828125, "loss": 4.090980529785156 }, { "id": "identity_105500", "image": "A-OKVQA/images/98322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski poles are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.484375, "loss": 4.254899024963379 }, { "id": "identity_35691", "image": "PMC-VQA/images/PMC7274469_pntd.0008141.g002_184814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the mass? \nChoices:\n(A) Left lobe \n(B) Right lobe \n(C) Middle lobe \n(D) Entire lung " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.109375, "loss": 4.625916481018066 }, { "id": "identity_24109", "image": "IconQA/images/83639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 4.519982814788818 }, { "id": "identity_273938", "image": "TQA/images/question_images/human_system_respiratory_3765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After passing through the larynx, what is the next part that the air travels through in the body?\nChoices:\n(A) bronchi\n(B) trachea\n(C) lung\n(D) nose" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.59375, "loss": 4.157268524169922 }, { "id": "identity_307776", "image": "AI2D/images/3979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the broad part of this called?\nChoices:\n(A) petiole\n(B) blade\n(C) midrib\n(D) stipule" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.625, "loss": 4.856330394744873 }, { "id": "identity_191436", "image": "PMC-VQA/images/PMC7756166_rcr2705-fig-0001_325202.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image with respect to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial orientation, patient supine" } ], "clip_score": 28.0625, "loss": 4.482510089874268 }, { "id": "identity_316327", "image": "FigureQA/images/23841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple the largest section?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.75, "loss": 4.699324607849121 }, { "id": "identity_335728", "image": "PMC-VQA/images/PMC4592919_fig9_429543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI image is depicted in this figure?\nChoices:\n(A) T1-weighted MRI\n(B) T2-weighted MRI\n(C) Diffusion-weighted MRI\n(D) Magnetic resonance angiography (MRA)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.4031267166137695 }, { "id": "identity_22395", "image": "PMC-VQA/images/PMC8044999_F2_417480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image represent? \nChoices:\n(A) Results of a study on mice \n(B) Results of a study on humans \n(C) Results of a study on pigs \n(D) Results of a study on rats " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.4375, "loss": 4.036904811859131 }, { "id": "identity_332825", "image": "ScienceQA/images/7116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) bouncy\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0625, "loss": 4.218008995056152 }, { "id": "identity_24171", "image": "PMC-VQA/images/PMC3751478_F3_226867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of image was used to confirm the atrophy of the vermis and the cerebellar hemispheres? \nChoices:\n(A) PET image \n(B) CT scan image \n(C) T2-weighted image \n(D) MRI image " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.0, "loss": 4.256689548492432 }, { "id": "identity_32547", "image": "IconQA/images/84501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough pencils for every piece of paper?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.3125, "loss": 4.081462383270264 }, { "id": "identity_264503", "image": "ScienceQA/images/4551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this box?\nChoices:\n(A) porcelain\n(B) cardboard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 4.2056965827941895 }, { "id": "identity_209246", "image": "VQA2.0/images/288827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are in flight in this image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 27.671875, "loss": 4.48723840713501 }, { "id": "identity_317461", "image": "TQA/images/abc_question_images/cycle_water_16000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the snow?\nChoices:\n(A) L\n(B) P\n(C) J\n(D) A" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.71875, "loss": 4.921009063720703 }, { "id": "identity_314403", "image": "ScienceQA/images/11610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tallahassee\n(B) Honolulu\n(C) Frankfort\n(D) Tampa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5, "loss": 4.365207195281982 }, { "id": "identity_301200", "image": "PMC-VQA/images/PMC8815180_Fig2_184364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being ligated here?\nChoices:\n(A) Left hepatic vein \n(B) Right hepatic vein \n(C) Short hepatic veins \n(D) Hepatic pedicle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.6875, "loss": 4.079835414886475 }, { "id": "identity_301271", "image": "A-OKVQA/images/11737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this person located?\nChoices:\n(A) stands\n(B) dugout\n(C) first base\n(D) pitcher's mound" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.140625, "loss": 4.2927069664001465 }, { "id": "identity_279508", "image": "DVQA/images/bar_train_00193347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the lowest total accuracy?" }, { "from": "gpt", "value": "The answer is sphere" } ], "clip_score": 31.34375, "loss": 4.709578514099121 }, { "id": "identity_276984", "image": "TQA/images/abc_question_images/parts_fish_12852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the diagram is represented by label F? Is it:\n(A) DORSAL FIN\n(B) CAUDAL FIN\n(C) PECTORAL FIN\n(D) VENTRAL FIN" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.75, "loss": 4.287947654724121 }, { "id": "identity_273954", "image": "ScienceQA/images/7599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\n(A) Boston\n(B) Baltimore\n(C) Annapolis\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.0, "loss": 4.142009735107422 }, { "id": "identity_191649", "image": "VQA2.0/images/269997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.984375, "loss": 4.192912578582764 }, { "id": "identity_41390", "image": "FigureQA/images/33567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is The title is \"title" } ], "clip_score": 36.625, "loss": 4.138132095336914 }, { "id": "identity_126484", "image": "Super-CLEVR/images/superCLEVR_new_001862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the motorbike that is located behind the blue bicycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.828125, "loss": 4.202629089355469 }, { "id": "identity_275073", "image": "TextVQA/images/631571cd72d6d688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the producer of the whisky in the white box?" }, { "from": "gpt", "value": "The answer is suntory" } ], "clip_score": 27.15625, "loss": 4.322049140930176 }, { "id": "identity_108695", "image": "VizWiz/images/VizWiz_train_00015553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the buttons on the shirt?" }, { "from": "gpt", "value": "The answer is Dark green or black" } ], "clip_score": 23.9375, "loss": 4.458158493041992 }, { "id": "identity_31553", "image": "ScienceQA/images/2630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter marks the Louisiana territory?\nChoices:\n(A) A\n(B) D\n(C) C\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.40625, "loss": 4.025464057922363 }, { "id": "identity_321964", "image": "PMC-VQA/images/PMC4674562_F1_452428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the lesion?\nChoices:\n(A) In the gray matter anterior to the Sylvian fissure.\n(B) In the gray matter posterior to the Sylvian fissure.\n(C) In the white matter anterior to the Sylvian fissure.\n(D) In the white matter posterior to the Sylvian fissure." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.46875, "loss": 4.088508129119873 }, { "id": "identity_337347", "image": "A-OKVQA/images/577835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is this?\nChoices:\n(A) model\n(B) ladder\n(C) commercial\n(D) passenger" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.6875, "loss": 4.228453159332275 }, { "id": "identity_335017", "image": "A-OKVQA/images/67131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which United States president was born in the state shown on this license plate?\nChoices:\n(A) lincoln\n(B) reagan\n(C) obama\n(D) jefferson" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.046875, "loss": 4.260105133056641 }, { "id": "identity_129295", "image": "Super-CLEVR/images/superCLEVR_new_001559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object that is in front of the small shiny bicycle and to the right of the purple shiny motorbike?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 24.21875, "loss": 5.02410888671875 }, { "id": "identity_186155", "image": "VQA2.0/images/426551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bananas are visible on the counter?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 16.40625, "loss": 5.3677778244018555 }, { "id": "identity_267621", "image": "TQA/images/question_images/skin_cross_section_7586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the outer layer of the skin?\nChoices:\n(A) fatty tissue\n(B) melanocytes\n(C) Epidermis\n(D) oil gland" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.53125, "loss": 4.062946319580078 }, { "id": "identity_44687", "image": "DocVQA/images/tznp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many scientists visited the Foundation in March of 1978?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.78125, "loss": 4.187996864318848 }, { "id": "identity_309413", "image": "PMC-VQA/images/PMC6525736_Fig3_482783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of visualization is shown in this content?\nChoices:\n(A) Scatter plot\n(B) Bar chart\n(C) Images\n(D) Surface plot" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.2890625, "loss": 4.561923027038574 }, { "id": "identity_129392", "image": "PMC-VQA/images/PMC7952901_Fig2_389219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the MRI image show any signs of compression on the spinal cord?" }, { "from": "gpt", "value": "The answer is Possible compression indicated by the arrow" } ], "clip_score": 32.625, "loss": 4.689104080200195 }, { "id": "identity_301149", "image": "A-OKVQA/images/352127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transportation is depicted?\nChoices:\n(A) rail\n(B) road\n(C) air\n(D) water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.40625, "loss": 4.398746013641357 }, { "id": "identity_211167", "image": "PMC-VQA/images/PMC8085189_Fig1_431007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of lung collapse or atelectasis?" }, { "from": "gpt", "value": "The answer is No signs of collapse" } ], "clip_score": 29.671875, "loss": 4.214149475097656 }, { "id": "identity_25945", "image": "VizWiz/images/VizWiz_train_00023929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is blood orange red" } ], "clip_score": 19.125, "loss": 4.565242290496826 }, { "id": "identity_22521", "image": "VizWiz/images/VizWiz_train_00009791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this shirt?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 20.265625, "loss": 4.961498260498047 }, { "id": "identity_43359", "image": "DVQA/images/bar_train_00177802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"couple\"?" }, { "from": "gpt", "value": "The answer is blind" } ], "clip_score": 28.53125, "loss": 4.170616626739502 }, { "id": "identity_261654", "image": "TextVQA/images/081e736f68b9cefb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current time?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 16.4375, "loss": 4.336376667022705 }, { "id": "identity_319104", "image": "ScienceQA/images/12087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared property of these items?\nChoices:\n(A) scratchy\n(B) flexible\n(C) fragile" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.7265625, "loss": 4.71267557144165 }, { "id": "identity_283203", "image": "VizWiz/images/VizWiz_train_00004599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what is shown in the image?" }, { "from": "gpt", "value": "The answer is man holding gun" } ], "clip_score": 23.25, "loss": 4.42417049407959 }, { "id": "identity_20667", "image": "Super-CLEVR/images/superCLEVR_new_000190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same size as the red rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 18.0, "loss": 5.086810111999512 }, { "id": "identity_193572", "image": "TextVQA/images/aa4424dbe9b49bef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many basketball hoops are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.515625, "loss": 4.39329719543457 }, { "id": "identity_40028", "image": "DocVQA/images/xnvd0227_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the document title?" }, { "from": "gpt", "value": "The answer is Biochemical Data" } ], "clip_score": 23.390625, "loss": 4.009832382202148 }, { "id": "identity_294421", "image": "ScienceQA/images/10683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Santa Fe\n(B) Jefferson City\n(C) Honolulu\n(D) Salem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.421875, "loss": 4.2287445068359375 }, { "id": "identity_295515", "image": "DocVQA/images/zrlp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What duty is scheduled for April 28th?" }, { "from": "gpt", "value": "The answer is swanson center" } ], "clip_score": 20.640625, "loss": 4.480442047119141 }, { "id": "identity_310954", "image": "FigureQA/images/15704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Seafoam the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.15625, "loss": 4.497872829437256 }, { "id": "identity_15359", "image": "VQA-AS/images/12912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the table?" }, { "from": "gpt", "value": "The answer is wine food" } ], "clip_score": 21.625, "loss": 4.911271095275879 }, { "id": "identity_30144", "image": "Super-CLEVR/images/superCLEVR_new_001877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a tiny object that is behind the gray metal truck; does it have the same shape as the rubber thing that is right of the tiny blue bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.8125, "loss": 4.968464374542236 }, { "id": "identity_261879", "image": "TQA/images/question_images/seasons_2633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event follows the winter solstice in the seasonal cycle?\nChoices:\n(A) autumnal equinox\n(B) vernal equinox\n(C) summer solstice\n(D) vernal solstice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 4.186746120452881 }, { "id": "identity_31052", "image": "A-OKVQA/images/440037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is this craft propelled along the water?\nChoices:\n(A) motor\n(B) foot paddles\n(C) paddle\n(D) wind" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.75, "loss": 4.016312599182129 }, { "id": "identity_315159", "image": "FigureQA/images/40536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.734375, "loss": 4.00724458694458 }, { "id": "identity_22159", "image": "PMC-VQA/images/PMC7847261_DMM046995F3_356415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the bold arrows indicate in the image? \nChoices:\n(A) Destruction of crypts \n(B) Fibrosis \n(C) Influx of inflammatory cells \n(D) Edema. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.84375, "loss": 4.352860927581787 }, { "id": "identity_316551", "image": "TextVQA/images/61408ab56f2fb616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What will help you take dreams further?" }, { "from": "gpt", "value": "The answer is quickmemo" } ], "clip_score": 19.03125, "loss": 4.640946388244629 }, { "id": "identity_4277", "image": "ScienceQA/images/289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Harrisburg\n(B) Trenton\n(C) Pittsburgh\n(D) Philadelphia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.294569969177246 }, { "id": "identity_327440", "image": "IconQA/images/48170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, which color are you less likely to get?\nChoices:\n(A) neither; black and pink are equally likely\n(B) black\n(C) pink" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.1875, "loss": 4.341556549072266 }, { "id": "identity_334076", "image": "A-OKVQA/images/514191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of year is this?\nChoices:\n(A) summer\n(B) autumn\n(C) winter\n(D) spring" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.484375, "loss": 4.178605079650879 }, { "id": "identity_111519", "image": "PMC-VQA/images/PMC9304965_F3_347375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the patient's orientation during the scan from this image?" }, { "from": "gpt", "value": "The answer is Yes, frontal orientation" } ], "clip_score": 29.203125, "loss": 4.088298797607422 }, { "id": "identity_287626", "image": "ScienceQA/images/3341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nChoices:\n(A) Massachusetts\n(B) South Carolina\n(C) Connecticut\n(D) Maine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.015625, "loss": 4.808475494384766 }, { "id": "identity_249553", "image": "ScienceQA/images/12921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property benefit their respective uses?" }, { "from": "gpt", "value": "The answer is Stretchiness benefits bubble gum by allowing bubbles and springs by storing energy" } ], "clip_score": 36.9375, "loss": 4.006762504577637 }, { "id": "identity_286188", "image": "VQA2.0/images/236677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color do the buildings in the lower right corner have?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 12.515625, "loss": 4.519144058227539 }, { "id": "identity_265358", "image": "Super-CLEVR/images/superCLEVR_new_000540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the brown rubber object that is the same size as the metal bus?" }, { "from": "gpt", "value": "The answer is biplane" } ], "clip_score": 26.46875, "loss": 4.499793529510498 }, { "id": "identity_211958", "image": "PMC-VQA/images/PMC6209937_sensors-18-03475-f008_388972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might be the reason for conducting this ultrasound?" }, { "from": "gpt", "value": "The answer is Diagnostic examination" } ], "clip_score": 31.015625, "loss": 4.419110298156738 }, { "id": "identity_239298", "image": "A-OKVQA/images/200597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible ways the audience might receive the speaker's message, and which method is most likely based on the visual evidence provided?" }, { "from": "gpt", "value": "The answer is through speakers" } ], "clip_score": 17.0625, "loss": 5.870950222015381 }, { "id": "identity_193309", "image": "A-OKVQA/images/300753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible from the pitching team?" }, { "from": "gpt", "value": "The answer is Two players are visible" } ], "clip_score": 25.734375, "loss": 4.1335344314575195 }, { "id": "identity_281907", "image": "PMC-VQA/images/PMC3844447_F11_246859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the arrowheads in the image?\nChoices:\n(A) No visible damage, \n(B) Visible damage, \n(C) Brain activity, \n(D) Magnetic resonance." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.96282958984375 }, { "id": "identity_125087", "image": "TextVQA/images/796d909a9addb189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bands are listed on the right side of the image under the title \"brain damage\"?" }, { "from": "gpt", "value": "The answer is Seven bands" } ], "clip_score": 23.109375, "loss": 4.480459213256836 }, { "id": "identity_271153", "image": "VizWiz/images/VizWiz_train_00003746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what that says?" }, { "from": "gpt", "value": "The answer is llbean" } ], "clip_score": 25.375, "loss": 4.116624355316162 }, { "id": "identity_324697", "image": "A-OKVQA/images/328216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they hoping to cross?\nChoices:\n(A) train tracks\n(B) pirate swords\n(C) rooftops\n(D) river" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.640625, "loss": 4.02399206161499 }, { "id": "identity_154528", "image": "AI2D/images/1528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orbits are shown around the nucleus in the diagram?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.71875, "loss": 4.409552574157715 }, { "id": "identity_111580", "image": "TextVQA/images/4b16ce5e64999340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of award is being presented in this image?" }, { "from": "gpt", "value": "The answer is Challenge coin" } ], "clip_score": 22.890625, "loss": 4.124423980712891 }, { "id": "identity_287723", "image": "ScienceQA/images/11929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Larry's experiment to answer?\nChoices:\n(A) Do pennies have the capacity to hold more drops of pure water or water mixed with hand soap?\n(B) Do pennies have the capacity to hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.703125, "loss": 4.098365306854248 }, { "id": "identity_279550", "image": "A-OKVQA/images/70636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located above the horses?\nChoices:\n(A) fly\n(B) kite\n(C) sky\n(D) dragon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 4.373208999633789 }, { "id": "identity_124788", "image": "A-OKVQA/images/358576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the price tags on the apples?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.125, "loss": 4.0012006759643555 }, { "id": "identity_326676", "image": "A-OKVQA/images/372554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green area here need?\nChoices:\n(A) water\n(B) salt\n(C) gas\n(D) electricity" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 2.7109375, "loss": 4.595654487609863 }, { "id": "identity_296259", "image": "PMC-VQA/images/PMC6194021_Fig4_383711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material was utilized for the histological images section?\nChoices:\n(A) PMMA\n(B) PVC\n(C) PEEK\n(D) PET" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.6875, "loss": 4.3345255851745605 }, { "id": "identity_305087", "image": "ScienceQA/images/5276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Knoxville\n(C) Boulder\n(D) Memphis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.828125, "loss": 4.532153129577637 }, { "id": "identity_36202", "image": "PMC-VQA/images/PMC7744532_Fig3_322190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue areas represent in the graph? \nChoices:\n(A) Longer durations \n(B) Shorter durations \n(C) Higher volume in the lower MI group \n(D) Lower volume in the lower MI group. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0859375, "loss": 4.784241676330566 }, { "id": "identity_334795", "image": "PMC-VQA/images/PMC6918282_polymers-11-01860-f001_85802.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of fibers are depicted in the images?\nChoices:\n(A) Cotton fibers\n(B) Carbon fibers\n(C) ES PLA/TiO2 fibers\n(D) Nylon fibers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.28125, "loss": 4.230738639831543 }, { "id": "identity_128903", "image": "A-OKVQA/images/150685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many burners are visible on the stove?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.453125, "loss": 4.567255973815918 }, { "id": "identity_270871", "image": "VizWiz/images/VizWiz_train_00014605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the water bottle?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 29.828125, "loss": 4.311590671539307 }, { "id": "identity_337911", "image": "FigureQA/images/22930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.984375, "loss": 4.01093053817749 }, { "id": "identity_295415", "image": "ScienceQA/images/15537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal has skin that is adapted for survival in cold places?\nChoices:\n(A) Eurasian lynx\n(B) fire salamander" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.088460445404053 }, { "id": "identity_336947", "image": "IconQA/images/43748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.078125, "loss": 4.454245567321777 }, { "id": "identity_217768", "image": "A-OKVQA/images/501995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many condiment bottles are visible on the counter?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 8.953125, "loss": 4.598365783691406 }, { "id": "identity_211210", "image": "TextVQA/images/72b49f8ba04ed2ce.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of device is mounted on the tripod?" }, { "from": "gpt", "value": "The answer is Camera" } ], "clip_score": 14.46875, "loss": 4.532951831817627 }, { "id": "identity_23290", "image": "A-OKVQA/images/431854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is facing down?\nChoices:\n(A) tennis racquet\n(B) sword\n(C) axe\n(D) laser beam" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.578125, "loss": 4.275118350982666 }, { "id": "identity_276942", "image": "Super-CLEVR/images/superCLEVR_new_000518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer cyan objects in front of the metal aeroplane compared to the number of rubber dirtbikes on the left side of the big green rubber aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.140625, "loss": 5.075296878814697 }, { "id": "identity_13585", "image": "AI2D/images/2342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label in the diagram represents the zygote?\nChoices:\n(A) F\n(B) G\n(C) E\n(D) I" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.125, "loss": 4.276287078857422 }, { "id": "identity_208442", "image": "TextVQA/images/963efab1d2158942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the artist who designed the label?" }, { "from": "gpt", "value": "The answer is Alex Liberati" } ], "clip_score": 12.8046875, "loss": 4.263566017150879 }, { "id": "identity_25674", "image": "PMC-VQA/images/PMC5455666_materials-08-04287-f005_173123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material of the fractured surface shown in SEM images? \nChoices:\n(A) Copper \n(B) Aluminum \n(C) Titanium \n(D) Iron " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.109375, "loss": 4.010062217712402 }, { "id": "identity_264977", "image": "FigureQA/images/19909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.734375, "loss": 4.695499420166016 }, { "id": "identity_314714", "image": "TQA/images/question_images/cycle_nitrogen_6711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What produces some organic matter?\nChoices:\n(A) fertilizers\n(B) plants and animals\n(C) farms and runoff\n(D) Fossil fuel emissions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.953125, "loss": 4.29364538192749 }, { "id": "identity_140078", "image": "DocVQA/images/nryf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the destination city for the flight?" }, { "from": "gpt", "value": "The answer is CLGA" } ], "clip_score": 16.46875, "loss": 5.93397331237793 }, { "id": "identity_182266", "image": "A-OKVQA/images/61354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of fruit can be seen in the wooden crates?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.984375, "loss": 4.303717613220215 }, { "id": "identity_15451", "image": "A-OKVQA/images/198057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the big animals called?\nChoices:\n(A) elephants\n(B) tigers\n(C) pelican\n(D) eels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1015625, "loss": 4.1921586990356445 }, { "id": "identity_34275", "image": "PMC-VQA/images/PMC6757074_Fig5_40839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the radiological finding in this image? \nChoices:\n(A) Fluid in the uterus \n(B) Free air in the abdomen \n(C) Gas in the uterine defect \n(D) Calcified mass in the pelvis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.0, "loss": 4.9085493087768555 }, { "id": "identity_333874", "image": "TQA/images/question_images/optics_ray_diagrams_9174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does this ray pass straight through the middle of the lens?\n\nChoices:\n(A) For reference\n(B) Because the lens has no thickness\n(C) Because it's a convex lens\n(D) Because it's a concave lens" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.53125, "loss": 4.251782417297363 }, { "id": "identity_14098", "image": "TQA/images/abc_question_images/human_system_respiratory_11403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Larynx?\nChoices:\n(A) T\n(B) A\n(C) V\n(D) X" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.265625, "loss": 4.077448844909668 }, { "id": "identity_312761", "image": "FigureQA/images/82778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.859375, "loss": 4.0072832107543945 }, { "id": "identity_300491", "image": "IconQA/images/46552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) neither; white and pink are equally likely\n(B) white\n(C) pink" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0, "loss": 4.346285343170166 }, { "id": "identity_41063", "image": "DocVQA/images/phwk0226_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of service is the booklet described as?" }, { "from": "gpt", "value": "The answer is Public service" } ], "clip_score": 23.484375, "loss": 4.133311748504639 }, { "id": "identity_271639", "image": "A-OKVQA/images/176519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for their choice of wearing wetsuits?\nChoices:\n(A) for fun\n(B) cold water\n(C) easier finding\n(D) showing off" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.59375, "loss": 4.787408351898193 }, { "id": "identity_126289", "image": "VQA-AS/images/9934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary activity the playground equipment is designed for?" }, { "from": "gpt", "value": "The answer is Seesaw play" } ], "clip_score": 31.96875, "loss": 4.246788024902344 }, { "id": "identity_332550", "image": "DVQA/images/bar_train_00105380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar at the bottom?" }, { "from": "gpt", "value": "The answer is score" } ], "clip_score": 18.421875, "loss": 4.072990894317627 }, { "id": "identity_2736", "image": "PMC-VQA/images/PMC4383027_f3-ijo-46-05-1913_374392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscope was used to capture the images of the colon cancer spheres? \nChoices:\n(A) Bright field \n(B) Fluorescence \n(C) Confocal \n(D) Electron " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.953125, "loss": 4.5651445388793945 }, { "id": "identity_38417", "image": "A-OKVQA/images/374966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area the boys are skating in called?\nChoices:\n(A) arena\n(B) pipe\n(C) bowl\n(D) ramp" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.484375, "loss": 4.215083122253418 }, { "id": "identity_278457", "image": "FigureQA/images/87855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the curve with the minimum area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.453125, "loss": 4.076930522918701 }, { "id": "identity_189863", "image": "TextVQA/images/939c816006f61921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 16.796875, "loss": 4.567670822143555 }, { "id": "identity_293551", "image": "A-OKVQA/images/471188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company possesses vehicles that resemble the ones on the street among the following options: (A) Tesla, (B) Dunkin Donuts, (C) McDonald's, or (D) Greyhound?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.46875, "loss": 4.160456657409668 }, { "id": "identity_50163", "image": "FigureQA/images/53855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the second highest value on the graph?" }, { "from": "gpt", "value": "The answer is Orange Red" } ], "clip_score": 27.28125, "loss": 4.586866855621338 }, { "id": "identity_321465", "image": "IconQA/images/6539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.5, "loss": 4.07503080368042 }, { "id": "identity_30706", "image": "ScienceQA/images/8376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) rough\n(B) colorful" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.046875, "loss": 4.3939714431762695 }, { "id": "identity_274719", "image": "FigureQA/images/89411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.421875, "loss": 4.024241924285889 }, { "id": "identity_32069", "image": "A-OKVQA/images/70744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This scene takes place during when?\nChoices:\n(A) summer\n(B) vietnam war\n(C) winter\n(D) korean war" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.71875, "loss": 4.414230823516846 }, { "id": "identity_330968", "image": "PMC-VQA/images/PMC3605910_pone-0059740-g006_193981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green fluorescence indicate in this image?\nChoices:\n(A) Necrosis\n(B) Autofluorescence\n(C) Favorable labeling section\n(D) Unfavorable labeling section" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 4.54452657699585 }, { "id": "identity_290374", "image": "PMC-VQA/images/PMC8792417_f0005_177857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which region of the brain is there a slight hyperperfusion observed?\nChoices:\n(A) Right frontal region\n(B) Left frontal region\n(C) Occipital region\n(D) Parietal region" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.625, "loss": 4.19201135635376 }, { "id": "identity_214918", "image": "A-OKVQA/images/43202.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.046875, "loss": 4.226816654205322 }, { "id": "identity_184015", "image": "VQA2.0/images/514457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bear cutouts are displayed in front of the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.59375, "loss": 4.28176736831665 }, { "id": "identity_33308", "image": "A-OKVQA/images/165862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hat does the man wearing jeans have on his head?\nChoices:\n(A) fedora\n(B) bucket hat\n(C) safari hat\n(D) top hat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.796875, "loss": 4.197824954986572 }, { "id": "identity_275382", "image": "AI2D/images/4148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of A in the rock cycle diagram?\nChoices:\n(A) metamorphic rocks\n(B) sedimentary rocks\n(C) magma\n(D) igneous rocks" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.15625, "loss": 4.649433135986328 }, { "id": "identity_12416", "image": "ScienceQA/images/18622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Phoenix\n(B) Sacramento\n(C) Olympia\n(D) Denver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5, "loss": 4.292041778564453 }, { "id": "identity_335725", "image": "A-OKVQA/images/289213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of sport is being practiced here?\nChoices:\n(A) team\n(B) winter\n(C) aquatic\n(D) combat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0859375, "loss": 4.418036460876465 }, { "id": "identity_274752", "image": "Super-CLEVR/images/superCLEVR_new_001545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer shiny airplanes in front of the yellow airplane compared to the small objects behind the red object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.015625, "loss": 5.205313205718994 }, { "id": "identity_130167", "image": "A-OKVQA/images/65063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct white markings are visible on the horse's front legs?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 29.640625, "loss": 4.509468078613281 }, { "id": "identity_108756", "image": "VQA2.0/images/28698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object hanging to the left of the black cat plush?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 33.375, "loss": 4.95279598236084 }, { "id": "identity_327508", "image": "TQA/images/question_images/velocity_time_graphs_8218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest velocity reached?\nChoices:\n(A) 60\n(B) 40\n(C) 100\n(D) 80" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0, "loss": 4.008723258972168 }, { "id": "identity_261670", "image": "TextVQA/images/8d1c1f6ffab661d6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On this page, what is the color of the words \"the social landscape\"?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 21.1875, "loss": 4.44844388961792 }, { "id": "identity_289932", "image": "AI2D/images/10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the image that corresponds to Fagonia indica:\n(A) D\n(B) E\n(C) A\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.9375, "loss": 4.290731430053711 }, { "id": "identity_270651", "image": "DVQA/images/bar_train_00051944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the second bar from the left in every group?" }, { "from": "gpt", "value": "The answer is band" } ], "clip_score": 18.046875, "loss": 4.272453308105469 }, { "id": "identity_270172", "image": "IconQA/images/60431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture corresponds to the ninth position in the sequence, given that the first picture is a plane? Choices are:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.6875, "loss": 4.18595027923584 }, { "id": "identity_22740", "image": "ScienceQA/images/5784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Illinois?\nChoices:\n(A) Springfield\n(B) Provo\n(C) Chicago\n(D) Madison" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.28125, "loss": 4.188827037811279 }, { "id": "identity_119323", "image": "VQA2.0/images/71855.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the dining area?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.015625, "loss": 4.634518146514893 }, { "id": "identity_30192", "image": "VizWiz/images/VizWiz_train_00008514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the greyest clouds this morning look like storm clouds, or just regular clouds? " }, { "from": "gpt", "value": "The answer is all white regular clouds" } ], "clip_score": 23.890625, "loss": 4.30344295501709 }, { "id": "identity_328591", "image": "PMC-VQA/images/PMC3239364_F1_119442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the left lane in this image?\nChoices:\n(A) To show the results of immunofluorescent labeling \n(B) To indicate the upper layer of synovial membranes \n(C) To show the magnification of the image \n(D) To display the results of immunohistochemistry" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.796875, "loss": 4.075663089752197 }, { "id": "identity_33058", "image": "PMC-VQA/images/PMC4304244_F8_352529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of deformation is shown in the results? \nChoices:\n(A) Forward deformation \n(B) Backward deformation \n(C) Lateral deformation \n(D) Radial deformation " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.09375, "loss": 4.2314653396606445 }, { "id": "identity_281001", "image": "A-OKVQA/images/326583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mode of transportation is depicted in the image? \nOptions:\n(A) air\n(B) water\n(C) rail\n(D) road" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.3515625, "loss": 4.613744258880615 }, { "id": "identity_11882", "image": "A-OKVQA/images/499951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What medium was the art on the wall done with?\nChoices:\n(A) spray paint\n(B) pencil\n(C) crayons\n(D) etchings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2734375, "loss": 4.210526943206787 }, { "id": "identity_276052", "image": "FigureQA/images/95784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.640625, "loss": 4.180392742156982 }, { "id": "identity_281356", "image": "PMC-VQA/images/PMC7648378_Fig1_293222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of view is utilized to display the cross-table view of the acetabular component?\nOptions:\n(A) anteroposterior view\n(B) oblique view\n(C) lateral view\n(D) ultrasound view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.15625, "loss": 4.37426233291626 }, { "id": "identity_329895", "image": "PMC-VQA/images/PMC6857131_Fig2_68959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used in this case?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) MRI\n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.453125, "loss": 4.295739650726318 }, { "id": "identity_296515", "image": "A-OKVQA/images/125120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probable content of the silver containers among the options of milk, juice, coffee, and water?\nChoices:\n(A) milk\n(B) juice\n(C) coffee\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.09765625, "loss": 4.137518405914307 }, { "id": "identity_21092", "image": "PMC-VQA/images/PMC5281579_F5_120924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) Nerves and muscles \n(B) Blood and lymph vessels \n(C) Bones and joints \n(D) Connective tissue and fat " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.453125, "loss": 4.2251362800598145 }, { "id": "identity_138147", "image": "DocVQA/images/yrvw0217_58.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proposed pricing strategy for Premarin/TMG in the US as mentioned in the Pricing section?" }, { "from": "gpt", "value": "The answer is parity to existing Prempro formulations" } ], "clip_score": 27.875, "loss": 4.338240623474121 }, { "id": "identity_298089", "image": "PMC-VQA/images/PMC5965189_f1-cpcem-01-268_318156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the shadows cast by the endometrium in the image?\nChoices:\n(A) Air in the uterus \n(B) Normal tissue \n(C) Irregular echogenic endometrial debris \n(D) Cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.796875, "loss": 4.060328960418701 }, { "id": "identity_316388", "image": "IconQA/images/94992.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a rocket. What is the eighth image?\nChoices:\n(A) moon\n(B) rocket\n(C) star" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.488804817199707 }, { "id": "identity_280354", "image": "AI2D/images/3429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the flower is located below the ovary? \n(A) Petal \n(B) Penduncle \n(C) Sepal \n(D) Stamen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0, "loss": 4.020120620727539 }, { "id": "identity_306797", "image": "A-OKVQA/images/155811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What alerts individuals here of a safe crossing time?\nChoices:\n(A) policeman\n(B) crossing guard\n(C) walk light\n(D) horses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.125, "loss": 4.216177463531494 }, { "id": "identity_272297", "image": "ScienceQA/images/20291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly identifies the direction of thermal energy transfer from () to (): \n(A) each cake to the surroundings \n(B) the surroundings to each cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.953125, "loss": 4.577646255493164 }, { "id": "identity_118701", "image": "Super-CLEVR/images/superCLEVR_new_000524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the green object to the right of the yellow bus?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 29.125, "loss": 4.5356926918029785 }, { "id": "identity_286107", "image": "ScienceQA/images/9849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property does this object possess?\nChoices:\n(A) sour\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.890625, "loss": 4.325766086578369 }, { "id": "identity_25863", "image": "VizWiz/images/VizWiz_train_00006101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this photo describing?" }, { "from": "gpt", "value": "The answer is this photo describing news stories" } ], "clip_score": 17.8125, "loss": 4.174149513244629 }, { "id": "identity_36636", "image": "A-OKVQA/images/150091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of area is in the background?\nChoices:\n(A) forest\n(B) urban\n(C) rural\n(D) mountain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 2.28515625, "loss": 4.868213653564453 }, { "id": "identity_332927", "image": "FigureQA/images/73182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.9453125, "loss": 4.487515926361084 }, { "id": "identity_148364", "image": "DocVQA/images/tyjf0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the city where the sponsor's address is located?" }, { "from": "gpt", "value": "The answer is Lincolnshire" } ], "clip_score": 21.4375, "loss": 4.834864139556885 }, { "id": "identity_26623", "image": "ScienceQA/images/19833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Jamaica\n(B) Cuba\n(C) Antigua and Barbuda\n(D) Haiti" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.015625, "loss": 4.019640922546387 }, { "id": "identity_314705", "image": "Super-CLEVR/images/superCLEVR_new_001518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the number of purple motorbikes on the left side of the purple scooter and the number of cyan cruisers in front of the tiny cruiser the same?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 29.84375, "loss": 5.0430378913879395 }, { "id": "identity_223410", "image": "A-OKVQA/images/145447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the building on the left side of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.578125, "loss": 4.333189487457275 }, { "id": "identity_283850", "image": "PMC-VQA/images/PMC6647078_Fig2_12408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is utilized to visualize ADC mapping?\nOptions:\n(A) DCE-MRI\n(B) DWI\n(C) Mammography\n(D) PET-CT" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.984375, "loss": 4.599435329437256 }, { "id": "identity_223304", "image": "VizWiz/images/VizWiz_train_00000536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible on the porch?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 19.21875, "loss": 4.533797740936279 }, { "id": "identity_333755", "image": "A-OKVQA/images/362309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country is this bus located?\nChoices:\n(A) dominica\n(B) philippines\n(C) india\n(D) thailand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.9375, "loss": 4.225694179534912 }, { "id": "identity_106539", "image": "VQA2.0/images/291717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.453125, "loss": 4.206284523010254 }, { "id": "identity_65290", "image": "AI2D/images/1920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers can you identify in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.9375, "loss": 4.995286464691162 }, { "id": "identity_8876", "image": "PMC-VQA/images/PMC6189648_F1_382390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the femur were the trabecular bone imaged? \nChoices:\n(A) Proximal femoral metaphysis \n(B) Distal femoral metaphysis \n(C) Femoral head \n(D) Femoral shaft. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.75, "loss": 4.323861598968506 }, { "id": "identity_305904", "image": "PMC-VQA/images/PMC3847501_F2_247545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of stain was applied to this image?\nChoices:\n(A) Immunohistochemistry stain \n(B) Fluorescence stain \n(C) Hematoxylin and eosin stain \n(D) Gram stain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.046875, "loss": 4.7151875495910645 }, { "id": "identity_279628", "image": "A-OKVQA/images/374685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items are typically found in the cages?\nChoices:\n(A) animal\n(B) pocket book\n(C) food\n(D) bottle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.578125, "loss": 4.250817775726318 }, { "id": "identity_268136", "image": "A-OKVQA/images/156652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of shirt is the man on the right wearing, with options including plaid, tie dye, flannel, and hippie special?\nChoices:\n(A) plaid\n(B) tie dye\n(C) flannel\n(D) hippie special" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.28125, "loss": 4.279516220092773 }, { "id": "identity_270196", "image": "PMC-VQA/images/PMC7539492_Fig1_267303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the finding of the chest computed tomography scan? \nChoices:\n(A) Left-sided massive pleural effusion with pneumothorax \n(B) Both-sided massive pleural effusion with pneumothorax \n(C) Right-sided massive pleural effusion with pneumothorax \n(D) No pleural effusion or pneumothorax detected." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.21875, "loss": 4.265987396240234 }, { "id": "identity_303781", "image": "Super-CLEVR/images/superCLEVR_new_000898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the green object that is the same shape as the large gray thing?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 16.328125, "loss": 4.338455677032471 }, { "id": "identity_1687", "image": "Super-CLEVR/images/superCLEVR_new_001985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the rubber thing that is the same size as the gray metallic scooter?" }, { "from": "gpt", "value": "The answer is truck" } ], "clip_score": 24.65625, "loss": 4.3754425048828125 }, { "id": "identity_277532", "image": "FigureQA/images/96630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Turquoise or Light Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7734375, "loss": 4.590190410614014 }, { "id": "identity_266947", "image": "AI2D/images/48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stage of development is represented at point A in the diagram?\nOptions:\n(A) Pupa\n(B) Larva\n(C) Adult\n(D) Egg" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.40625, "loss": 4.384984970092773 }, { "id": "identity_298995", "image": "PMC-VQA/images/PMC5175298_Fig2_106912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which areas of the brain exhibit hyperintensity on the MRI image?\nChoices:\n(A) Subcapsular regions \n(B) Basal ganglia \n(C) Hippocampus \n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.171875, "loss": 4.39557409286499 }, { "id": "identity_289489", "image": "TabMWP/images/20374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 4.636317253112793 }, { "id": "identity_281979", "image": "TextVQA/images/617ce40fdfb99184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What name is printed on his shirt?" }, { "from": "gpt", "value": "The answer is hilingdon" } ], "clip_score": 20.5, "loss": 5.317550182342529 }, { "id": "identity_485", "image": "AI2D/images/3528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the hemicellulose?\nChoices:\n(A) c\n(B) b\n(C) e\n(D) d" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.140625, "loss": 4.248831272125244 }, { "id": "identity_49807", "image": "DocVQA/images/ksjw0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the nature of the complaint as per the form?" }, { "from": "gpt", "value": "The answer is last 3-4 purchases have all contained cigts. that fire falls off 3/4 way down after lighting" } ], "clip_score": 26.40625, "loss": 4.668329238891602 }, { "id": "identity_188849", "image": "PMC-VQA/images/PMC6920455_Fig3_86881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the heart in this image?" }, { "from": "gpt", "value": "The answer is Transverse orientation" } ], "clip_score": 26.46875, "loss": 4.15339994430542 }, { "id": "identity_270568", "image": "Super-CLEVR/images/superCLEVR_new_000777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the number of blue shiny aeroplanes behind the large shiny aeroplane equal to the number of blue shiny airliners?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.765625, "loss": 4.893431663513184 }, { "id": "identity_5767", "image": "AI2D/images/73.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What organism benefits from carbon dioxide given out by fish?\nChoices:\n(A) animals\n(B) People\n(C) Plants\n(D) dirt" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5625, "loss": 4.027670383453369 }, { "id": "identity_6265", "image": "AI2D/images/1078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the outermost layer of plant cell?\nChoices:\n(A) Amylosplast\n(B) Cell Membrane\n(C) Cell wall\n(D) Cytoplasm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.421875, "loss": 4.019984245300293 }, { "id": "identity_264108", "image": "ScienceQA/images/7382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.203125, "loss": 4.061864852905273 }, { "id": "identity_7924", "image": "AI2D/images/3976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: what is the above type of figure?\nChoices:\n(A) aster flower head\n(B) part of bird\n(C) parts of human\n(D) part of animal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.09375, "loss": 4.374575614929199 }, { "id": "identity_268284", "image": "VQA2.0/images/539283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the chest area of a bird?" }, { "from": "gpt", "value": "The answer is feather" } ], "clip_score": 20.671875, "loss": 4.236293315887451 }, { "id": "identity_271731", "image": "PMC-VQA/images/PMC7287056_Fig2_189661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does Figure A depict?\nOptions:\n(A) The distribution of the eruption materials\n(B) The feeder dikes\n(C) The ground surface of the eruption fissure\n(D) The location of the volcano" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.875, "loss": 5.163477897644043 }, { "id": "identity_291163", "image": "PMC-VQA/images/PMC4940967_Fig1_50726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the CT scan image?\nChoices:\n(A) Blood vessels that are swollen \n(B) Lymph nodes that are swollen \n(C) Swollen abdominal muscles \n(D) Swollen Kidneys." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.796875, "loss": 4.489579200744629 }, { "id": "identity_276153", "image": "PMC-VQA/images/PMC7838310_Fig6_354011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the magnified box in option (B)?\nChoices:\n(A) A closer look at the polyp tissues of O. faveolata \n(B) The distribution of Symbiodiniaceae in coral tissues \n(C) The role of calmodulin in coral bleaching \n(D) The abundance of carbonic anhydrase in coral mucus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.859375, "loss": 4.232486248016357 }, { "id": "identity_190457", "image": "A-OKVQA/images/305667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is At least six people" } ], "clip_score": 21.0625, "loss": 4.205835342407227 }, { "id": "identity_272435", "image": "ScienceQA/images/16136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these four objects share among the options of being transparent, sweet, or sour?\nChoices:\n(A) transparent\n(B) sweet\n(C) sour" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.140625, "loss": 4.5710577964782715 }, { "id": "identity_307103", "image": "PMC-VQA/images/PMC7872619_f1-cpcem-05-125_364399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images?\nChoices:\n(A) Free air under an organ.\n(B) Enlarged stomach.\n(C) Free air under a part of the body.\n(D) Normal x-ray." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.4375, "loss": 4.53163480758667 }, { "id": "identity_309117", "image": "DVQA/images/bar_train_00169832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth bar from the left in each category?" }, { "from": "gpt", "value": "The answer is pause" } ], "clip_score": 17.53125, "loss": 4.7738938331604 }, { "id": "identity_295128", "image": "TQA/images/abc_question_images/cycle_water_14915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options represents the term \"runoff\"? \n(A) S \n(B) U \n(C) F \n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.734375, "loss": 4.014717102050781 }, { "id": "identity_36993", "image": "IconQA/images/43748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 4.650631427764893 }, { "id": "identity_197258", "image": "PMC-VQA/images/PMC4650277_Fig1_445147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is primarily being visualized in this image?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 21.671875, "loss": 4.1877946853637695 }, { "id": "identity_268307", "image": "TQA/images/question_images/human_system_digestive_3702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human anatomy, as shown in the diagram, is responsible for producing bile?\nChoices:\n(A) stomach\n(B) gallbladder\n(C) kidneys\n(D) liver" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.5, "loss": 4.233977317810059 }, { "id": "identity_31964", "image": "TextVQA/images/24bec4d1080a9171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first letter on the boys shirt?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 14.4453125, "loss": 4.376936912536621 }, { "id": "identity_19852", "image": "ScienceQA/images/12238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these three objects have in common?\nChoices:\n(A) rough\n(B) blue\n(C) bouncy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.90625, "loss": 4.361838340759277 }, { "id": "identity_126268", "image": "A-OKVQA/images/410779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole carrots can be seen on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.875, "loss": 4.399270057678223 }, { "id": "identity_285847", "image": "A-OKVQA/images/52966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishing feature does the man in the middle possess?\nChoices:\n(A) goatee\n(B) horns\n(C) third eye\n(D) triple chin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.5703125, "loss": 4.908430576324463 }, { "id": "identity_19534", "image": "A-OKVQA/images/147971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal mimics the pattern of the plane to the left?\nChoices:\n(A) cheetah\n(B) snow leopard\n(C) frog\n(D) dog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.09375, "loss": 4.102944374084473 }, { "id": "identity_59729", "image": "MapQA/images/map_1169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have inquiries in the range of 79048-95949?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 32.9375, "loss": 4.236658573150635 }, { "id": "identity_156879", "image": "TQA/images/question_images/aquifers_6517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the treated water pumped in from supply works?" }, { "from": "gpt", "value": "The answer is For storage and supply during high demand" } ], "clip_score": 28.25, "loss": 4.004055976867676 }, { "id": "identity_274020", "image": "FigureQA/images/67969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the curve with the maximum area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5625, "loss": 4.375175476074219 }, { "id": "identity_310880", "image": "A-OKVQA/images/453819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is above the surface?\nChoices:\n(A) boat\n(B) swimmer\n(C) surfer\n(D) dolphin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.703125, "loss": 4.380859375 }, { "id": "identity_223515", "image": "VizWiz/images/VizWiz_train_00017646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the tiles on the wall?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 21.46875, "loss": 4.033115386962891 }, { "id": "identity_33872", "image": "PMC-VQA/images/PMC7207249_F2_165556.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the final angiogram?\nChoices:\n(A) Blockage of the ureters \n(B) Perfect placement of stent \n(C) Nephritis \n(D) Both A & C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.875, "loss": 4.167099952697754 }, { "id": "identity_15579", "image": "A-OKVQA/images/153834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The cow belongs to which genus?\nChoices:\n(A) bovinae\n(B) bovidae\n(C) bos\n(D) cattle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.171875, "loss": 4.223693370819092 }, { "id": "identity_334280", "image": "A-OKVQA/images/102461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the people here observe?\nChoices:\n(A) party\n(B) horse race\n(C) candled cake\n(D) protest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.640625, "loss": 4.25985860824585 }, { "id": "identity_299502", "image": "VizWiz/images/VizWiz_train_00018691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Was I informed correctly that this is a strawberry ice cream, or is someone holding the ice cream, as this appears to be an Avon book?" }, { "from": "gpt", "value": "The answer is no not being held" } ], "clip_score": 30.140625, "loss": 5.132472991943359 }, { "id": "identity_12558", "image": "IconQA/images/45498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) impossible\n(B) unlikely\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.59375, "loss": 4.108453750610352 }, { "id": "identity_321912", "image": "A-OKVQA/images/75581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person doing?\nChoices:\n(A) showing off\n(B) taking shortcut\n(C) tricks\n(D) falling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.96875, "loss": 4.557523250579834 }, { "id": "identity_274019", "image": "FigureQA/images/52684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5625, "loss": 4.670333385467529 }, { "id": "identity_257769", "image": "PMC-VQA/images/PMC4144387_fig1_315613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible clinical implications of a hypointense image at the birth of the MCA as observed in the given image?" }, { "from": "gpt", "value": "The answer is Potential stroke or reduced blood supply" } ], "clip_score": 32.15625, "loss": 4.502999782562256 }, { "id": "identity_280300", "image": "A-OKVQA/images/161738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which city are the signs showing their love?\nChoices:\n(A) Manchester\n(B) Moscow\n(C) Milan\n(D) Manila" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.265625, "loss": 4.080140590667725 }, { "id": "identity_322834", "image": "PMC-VQA/images/PMC7370341_Fig2_216498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the left OA originate from in this image?\nChoices:\n(A) Superior mesenteric artery \n(B) Splenic artery \n(C) Celiac trunk \n(D) Renal artery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.21875, "loss": 4.652395725250244 }, { "id": "identity_306873", "image": "FigureQA/images/92049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.484375, "loss": 4.002252101898193 }, { "id": "identity_106428", "image": "TextVQA/images/aac56de71155f98e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the app located second from the left in the dock?" }, { "from": "gpt", "value": "The answer is Tools" } ], "clip_score": 24.03125, "loss": 4.230493068695068 }, { "id": "identity_304216", "image": "VizWiz/images/VizWiz_train_00012687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code on this image?" }, { "from": "gpt", "value": "The answer is gfdg" } ], "clip_score": 22.421875, "loss": 4.306957721710205 }, { "id": "identity_117567", "image": "VizWiz/images/VizWiz_train_00001138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the objects placed behind the creamer container in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 33.3125, "loss": 4.181375026702881 }, { "id": "identity_286574", "image": "A-OKVQA/images/316336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is prohibited in this establishment? \n(A) alcohol\n(B) smoking\n(C) children\n(D) shouting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.8671875, "loss": 4.600646018981934 }, { "id": "identity_315803", "image": "PMC-VQA/images/PMC4947334_Fig2_52439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What condition is depicted in the CT angiography?\nChoices:\n(A) Agenesis of right pulmonary artery,\n(B) Agenesis of left pulmonary artery,\n(C) Agenesis of both pulmonary arteries,\n(D) N/A." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.734375, "loss": 4.165847301483154 }, { "id": "identity_337006", "image": "A-OKVQA/images/322771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the grey object on top of the book used for?\nChoices:\n(A) wrestling\n(B) painting\n(C) exercising\n(D) gaming" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.6171875, "loss": 4.4233527183532715 }, { "id": "identity_273822", "image": "IconQA/images/45498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) impossible\n(B) unlikely\n(C) probable\n(D) certain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.53125, "loss": 4.416289329528809 }, { "id": "identity_5100", "image": "A-OKVQA/images/222251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The woman on the left has what above her nose?\nChoices:\n(A) jewelry\n(B) glasses\n(C) hat\n(D) dirt" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.171875, "loss": 4.607126712799072 }, { "id": "identity_307887", "image": "DVQA/images/bar_train_00105722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars have at least one bar with a value greater than -1?" }, { "from": "gpt", "value": "The answer is seven" } ], "clip_score": 21.03125, "loss": 4.288699626922607 }, { "id": "identity_109362", "image": "Super-CLEVR/images/superCLEVR_new_001151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly to the right of the green minivan?" }, { "from": "gpt", "value": "The answer is Motorbike" } ], "clip_score": 27.484375, "loss": 4.147264003753662 }, { "id": "identity_64283", "image": "TQA/images/question_images/waves_interactions_interference_9300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the amplitude of the resulting wave in constructive interference compare to the original waves?" }, { "from": "gpt", "value": "The answer is Greater amplitude" } ], "clip_score": 33.9375, "loss": 4.268545150756836 }, { "id": "identity_273821", "image": "Super-CLEVR/images/superCLEVR_new_000920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the large matte chopper the same as the sedan?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.671875, "loss": 5.190769672393799 }, { "id": "identity_297911", "image": "A-OKVQA/images/448851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: By what light source are the cats on the desk illuminated?\nChoices:\n(A) sun\n(B) desk lamp\n(C) moon\n(D) overhead light" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.140625, "loss": 4.275563716888428 }, { "id": "identity_264617", "image": "VQA-AS/images/11789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What design is featured on the drapes?" }, { "from": "gpt", "value": "The answer is tagged" } ], "clip_score": 27.75, "loss": 5.124391078948975 }, { "id": "identity_328523", "image": "A-OKVQA/images/42805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who use these big umbrellas?\nChoices:\n(A) hawkers\n(B) shoppers\n(C) residents\n(D) shop owners" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.375, "loss": 4.440484046936035 }, { "id": "identity_11051", "image": "ScienceQA/images/21188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) stretchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.296875, "loss": 4.918549060821533 }, { "id": "identity_7639", "image": "PMC-VQA/images/PMC6218971_Fig2_391723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of scan was used to detect the cysts? \nChoices:\n(A) Vertical scanning image \n(B) Magnetic resonance imaging \n(C) Horizontal scanning image \n(D) X-ray " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.296875, "loss": 4.360109806060791 }, { "id": "identity_31836", "image": "PMC-VQA/images/PMC5797361_Fig1_271410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the second column show? \nChoices:\n(A) CT tomographic images of the preimplant plan \n(B) Locations of needles before implantation \n(C) Images of the postimplant plan \n(D) Dosimetric evaluations of GTV and adjacent OARs " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.421875, "loss": 4.551845073699951 }, { "id": "identity_337275", "image": "A-OKVQA/images/127788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is depicted?\nChoices:\n(A) air\n(B) rail\n(C) road\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.671875, "loss": 4.527114391326904 }, { "id": "identity_261637", "image": "A-OKVQA/images/114288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the bus door open at this location?\nChoices:\n(A) loading passengers\n(B) stop light\n(C) driving\n(D) collecting donations" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.90625, "loss": 4.515895366668701 }, { "id": "identity_325544", "image": "PMC-VQA/images/PMC2410174_fig1_23925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are shown?\nChoices:\n(A) T1W\n(B) FLAIR\n(C) T2W-FSE\n(D) PET" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.578125, "loss": 4.810931205749512 }, { "id": "identity_18984", "image": "PMC-VQA/images/PMC5920263_fig01_306710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the lateral ankle radiographs show? \nChoices:\n(A) Only a fracture of the medial malleolus. \n(B) A fracture of the lateral malleolus. \n(C) A suspicious avulsion fracture on the anterior part of lateral malleolus. \n(D) A suspicious avulsion fracture on posterior part of lateral malleolus. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.40625, "loss": 4.150771141052246 }, { "id": "identity_289895", "image": "PMC-VQA/images/PMC3784059_Fig3_233800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images provided?\nChoices:\n(A) MRI scan \n(B) CT scan \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 4.7671990394592285 }, { "id": "identity_295884", "image": "A-OKVQA/images/391887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which profession is visible among the following options? \n(A) garbage man\n(B) rancher\n(C) firefighter\n(D) cowboy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.125, "loss": 4.462658405303955 }, { "id": "identity_208912", "image": "PMC-VQA/images/PMC4357181_Fig2_366923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the dashed blue lines in the image?" }, { "from": "gpt", "value": "The answer is Indicate internal division or boundary" } ], "clip_score": 19.875, "loss": 5.159468650817871 }, { "id": "identity_27679", "image": "PMC-VQA/images/PMC4689804_F1_455978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is displayed in the color-coded image? \nChoices:\n(A) Fiber orientation \n(B) Anisotropy \n(C) Staining intensity \n(D) All of the above. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.453125, "loss": 4.122892379760742 }, { "id": "identity_129215", "image": "A-OKVQA/images/557944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batting helmet?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.28125, "loss": 4.745095252990723 }, { "id": "identity_291046", "image": "FigureQA/images/97127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Dark Salmon? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.4325151443481445 }, { "id": "identity_308544", "image": "TextVQA/images/da7af4d7278cb999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this laptop?" }, { "from": "gpt", "value": "The answer is xps" } ], "clip_score": 22.578125, "loss": 4.090166091918945 }, { "id": "identity_106424", "image": "TextVQA/images/912997c3cb1b39d9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many authors contributed to this book?" }, { "from": "gpt", "value": "The answer is Two authors" } ], "clip_score": 11.921875, "loss": 5.31229305267334 }, { "id": "identity_110737", "image": "A-OKVQA/images/50431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the carriage?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.421875, "loss": 4.255285739898682 }, { "id": "identity_319173", "image": "TextVQA/images/cf12b0d2b6c33fe8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is one of the items at the top of the page?" }, { "from": "gpt", "value": "The answer is features" } ], "clip_score": 14.0859375, "loss": 5.3266401290893555 }, { "id": "identity_228795", "image": "Super-CLEVR/images/superCLEVR_new_000260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are larger than the cyan minivan that is left of the green minivan?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 29.4375, "loss": 4.472627639770508 }, { "id": "identity_119430", "image": "TextVQA/images/8924f92be9064c4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of container is on the counter under the clock?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 16.203125, "loss": 5.120458126068115 }, { "id": "identity_297735", "image": "A-OKVQA/images/313948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of building is depicted in the image?\nOptions:\n(A) expo hall\n(B) barn\n(C) school\n(D) kitchen furnishing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.046875, "loss": 4.424618721008301 }, { "id": "identity_193490", "image": "A-OKVQA/images/264919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person wearing on their lower body?" }, { "from": "gpt", "value": "The answer is Nothing visible" } ], "clip_score": 16.015625, "loss": 4.7323384284973145 }, { "id": "identity_29512", "image": "Super-CLEVR/images/superCLEVR_new_000323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the big yellow object that is behind the big yellow matte bicycle have the same shape as the small green object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.3125, "loss": 5.329531669616699 }, { "id": "identity_284799", "image": "ScienceQA/images/931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is being emphasized in the following options?\n(A) the Southern Ocean\n(B) the Pacific Ocean\n(C) the Atlantic Ocean\n(D) the Arctic Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.1875, "loss": 4.0262370109558105 }, { "id": "identity_207852", "image": "A-OKVQA/images/363845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.34375, "loss": 4.226894378662109 }, { "id": "identity_335576", "image": "A-OKVQA/images/71582.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most apparent thing that has been done to the unusual hair?\nChoices:\n(A) shaved\n(B) glittered\n(C) dyed\n(D) cut" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.578125, "loss": 4.736846923828125 }, { "id": "identity_6079", "image": "A-OKVQA/images/507966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who provided these umbrellas?\nChoices:\n(A) beach owner\n(B) homeless people\n(C) salvation army\n(D) beach goers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.84375, "loss": 4.077764987945557 }, { "id": "identity_286348", "image": "PMC-VQA/images/PMC5807339_F1_274149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the left panel of the image?\nChoices:\n(A) A spectrum in a single voxel extracted from the multivoxel data at the location of the tumor. \n(B) An X-ray image of the tumor \n(C) A CT scan of the tumor \n(D) The location of the tumor on a brain diagram" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.53125, "loss": 4.384240627288818 }, { "id": "identity_7868", "image": "PMC-VQA/images/PMC7960067_fig1_391626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located?\nChoices:\n(A) Left frontal scalp \n(B) Right frontal scalp \n(C) Left occipital scalp \n(D) Right occipital scalp " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.430109977722168 }, { "id": "identity_28115", "image": "AI2D/images/4145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What goes into the plant?\nChoices:\n(A) respiration\n(B) water\n(C) milk\n(D) oxygen" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.3125, "loss": 4.006619930267334 }, { "id": "identity_155607", "image": "ScienceQA/images/15905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature that forms the southern border of the highlighted country?" }, { "from": "gpt", "value": "The answer is Connecticut River" } ], "clip_score": 29.734375, "loss": 4.27767276763916 }, { "id": "identity_330568", "image": "PMC-VQA/images/PMC6682236_f3-cpcem-3-243_19654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to capture this image?\nChoices:\n(A) MRI\n(B) X-ray\n(C) CT\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.828125, "loss": 4.427163600921631 }, { "id": "identity_293192", "image": "IconQA/images/40591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.15625, "loss": 4.064219951629639 }, { "id": "identity_299328", "image": "PMC-VQA/images/PMC5911611_cam41363-fig-0001_305036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technology is utilized to capture the image depicted in the figure?\nChoices:\n(A) BCT \n(B) EM \n(C) FET \n(D) PET" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.796875, "loss": 4.423512935638428 }, { "id": "identity_135320", "image": "DVQA/images/bar_train_00184771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest 'cost' value?" }, { "from": "gpt", "value": "The answer is plate" } ], "clip_score": 27.296875, "loss": 4.011570453643799 }, { "id": "identity_328797", "image": "DVQA/images/bar_train_00010402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the smallest valued bar in the entire chart?" }, { "from": "gpt", "value": "The answer is speed" } ], "clip_score": 28.59375, "loss": 4.114786624908447 }, { "id": "identity_153447", "image": "TQA/images/abc_question_images/ocean_zones_18126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the area closest to the shore?" }, { "from": "gpt", "value": "The answer is Neritic zone" } ], "clip_score": 33.0625, "loss": 4.175119400024414 }, { "id": "identity_283316", "image": "DocVQA/images/kydh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Prof. Nelson Chaves currently located?" }, { "from": "gpt", "value": "The answer is Director" } ], "clip_score": 21.625, "loss": 4.918766975402832 }, { "id": "identity_210571", "image": "PMC-VQA/images/PMC3971497_fig2_277808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the dark area labeled with \"L\" in the ultrasound image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 35.96875, "loss": 4.886044502258301 }, { "id": "identity_290558", "image": "PMC-VQA/images/PMC6326132_gf0100_420639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which specific body part did the imaging procedure concentrate?\nChoices:\n(A) Right upper limb \n(B) Left lower limb \n(C) Abdomen \n(D) Head" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.830112934112549 }, { "id": "identity_111493", "image": "A-OKVQA/images/340454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing signs are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 22.578125, "loss": 4.392769813537598 }, { "id": "identity_271545", "image": "Super-CLEVR/images/superCLEVR_new_001754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do any small cyan metallic objects have the same shape as the tiny cyan matte object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.0, "loss": 5.305307865142822 }, { "id": "identity_115388", "image": "TextVQA/images/10e0f8abcdbd76e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of numeral style is used on the clock face in the image?" }, { "from": "gpt", "value": "The answer is Roman" } ], "clip_score": 21.5, "loss": 4.098935127258301 }, { "id": "identity_11764", "image": "A-OKVQA/images/293895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What in the image provides shade?\nChoices:\n(A) towers\n(B) towels\n(C) umbrellas\n(D) trees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0703125, "loss": 4.171499729156494 }, { "id": "identity_315052", "image": "ScienceQA/images/4853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is this?\nChoices:\n(A) sedimentary\n(B) igneous\n(C) metamorphic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.28125, "loss": 4.2141242027282715 }, { "id": "identity_5467", "image": "AI2D/images/3492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the image is marked in dark purple colour?\nChoices:\n(A) A\n(B) C\n(C) J\n(D) F" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.578125, "loss": 4.126242637634277 }, { "id": "identity_277744", "image": "ScienceQA/images/16378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do moss plants have the ability to produce seeds?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.578125, "loss": 4.092111587524414 }, { "id": "identity_325776", "image": "PMC-VQA/images/PMC4506094_pone.0128386.g002_407093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are depicted in this image?\nChoices:\n(A) T2*w\n(B) T1\n(C) Phase\n(D) None of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.859375, "loss": 4.05507755279541 }, { "id": "identity_207933", "image": "VQA2.0/images/469248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red and white umbrellas can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.9140625, "loss": 4.503281593322754 }, { "id": "identity_196526", "image": "VQA2.0/images/179586.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the door to the left of the person?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.078125, "loss": 4.114589214324951 }, { "id": "identity_274070", "image": "FigureQA/images/63325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Light Salmon or Dark Magenta?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.296875, "loss": 4.528557300567627 }, { "id": "identity_298561", "image": "VizWiz/images/VizWiz_train_00023875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please provide me with the number displayed on the CAPTCHA screen?" }, { "from": "gpt", "value": "The answer is subleave" } ], "clip_score": 30.453125, "loss": 5.166523456573486 }, { "id": "identity_327945", "image": "A-OKVQA/images/143764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is typically shown on the chalkboard in these places?\nChoices:\n(A) taxes\n(B) specials\n(C) tips\n(D) phone numbers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.015625, "loss": 4.671111583709717 }, { "id": "identity_34937", "image": "PMC-VQA/images/PMC5475293_F0002_180333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow correspond to in image B? \nChoices:\n(A) LUS zone 2 \n(B) LUS zone 3 \n(C) LUS zone 1 \n(D) LUS zone 4 " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.21875, "loss": 4.219707012176514 }, { "id": "identity_305501", "image": "TextVQA/images/17f22d6d1632458b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed on this?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 21.09375, "loss": 4.413622856140137 }, { "id": "identity_292459", "image": "ScienceQA/images/19570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these three objects share among the following choices: soft, salty, or slippery?\nChoices:\n(A) soft\n(B) salty\n(C) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.2734375, "loss": 4.2411322593688965 }, { "id": "identity_215991", "image": "A-OKVQA/images/51674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name written on the side of the boat on the left?" }, { "from": "gpt", "value": "The answer is Ghosts" } ], "clip_score": 20.109375, "loss": 4.504141330718994 }, { "id": "identity_204832", "image": "A-OKVQA/images/5947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the markings on the tail of the planes?" }, { "from": "gpt", "value": "The answer is Roundel" } ], "clip_score": 19.65625, "loss": 4.984710693359375 }, { "id": "identity_269102", "image": "ScienceQA/images/17533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which description more accurately characterizes the ecosystem of the Steigerwald Forest?\nChoices:\n(A) It experiences cold, wet winters and has soil that lacks nutrients.\n(B) It has nutrient-rich soil and a limited variety of tree species." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.703125, "loss": 4.153397560119629 }, { "id": "identity_330679", "image": "PMC-VQA/images/PMC7329434_Fig1_203237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the optical coherence tomography indicating?\nChoices:\n(A) Macular edema\n(B) Capillary nonperfusion areas\n(C) Retinal telangiectasias\n(D) Aneurysmal dilatations" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.09375, "loss": 4.70742130279541 }, { "id": "identity_275205", "image": "ScienceQA/images/9727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\nChoices:\n(A) Annapolis\n(B) Montpelier\n(C) Harrisburg\n(D) Baltimore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.328125, "loss": 4.089727401733398 }, { "id": "identity_312039", "image": "VQA2.0/images/67407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are present?" }, { "from": "gpt", "value": "The answer is lot" } ], "clip_score": 11.640625, "loss": 4.44785737991333 }, { "id": "identity_185146", "image": "A-OKVQA/images/305206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.15625, "loss": 4.097898006439209 }, { "id": "identity_319493", "image": "VQA2.0/images/544325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color shirt is the person on the right wearing?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 16.484375, "loss": 4.704335689544678 }, { "id": "identity_22794", "image": "PMC-VQA/images/PMC7467987_Figure1_247139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the sagittal reconstruction of the lung show? \nChoices:\n(A) the tumor invades the left atrium \n(B) the tumor invades the right pulmonary artery \n(C) the tumor invades the left pulmonary artery \n(D) the tumor invades the right atrium " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.828125, "loss": 4.118645668029785 }, { "id": "identity_121228", "image": "VQA2.0/images/297462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles are visible on the ferry dock?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.609375, "loss": 4.487771034240723 }, { "id": "identity_148625", "image": "DocVQA/images/qlyn0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the letter?" }, { "from": "gpt", "value": "The answer is Weekly highlights" } ], "clip_score": 18.859375, "loss": 4.748709678649902 }, { "id": "identity_262643", "image": "A-OKVQA/images/571950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What lies beneath this man?\nChoices:\n(A) water\n(B) shadow\n(C) grass\n(D) sand" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9609375, "loss": 4.474636077880859 }, { "id": "identity_260454", "image": "TQA/images/abc_question_images/photosynthesis_17481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the label that represents the gas absorbed by the plant for photosynthesis and explain the role of this gas in the process" }, { "from": "gpt", "value": "The answer is F" } ], "clip_score": 28.53125, "loss": 4.298799991607666 }, { "id": "identity_131498", "image": "TextVQA/images/bbc3c318765994c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is depicted in the letter \"N\"?" }, { "from": "gpt", "value": "The answer is A bridge" } ], "clip_score": 8.53125, "loss": 4.102320671081543 }, { "id": "identity_35543", "image": "PMC-VQA/images/PMC7814755_hbm25261-fig-0002_345124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main point depicted in the top right panel? \nChoices:\n(A) The diagram of the spinal trigeminal nucleus (sTN) \n(B) The midline of the brainstem \n(C) The overview of deterministic bundle-specific tractography \n(D) The production of streamlines in the AP. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.015625, "loss": 4.497815132141113 }, { "id": "identity_16041", "image": "PMC-VQA/images/PMC4684858_fig2_454668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the soft tissue mass observed in the CT scan? \nChoices:\n(A) Carotid space \n(B) Buccal space \n(C) Pterygopalatine fossa \n(D) Nasopharynx area. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 4.432344436645508 }, { "id": "identity_296455", "image": "ScienceQA/images/20901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Trenton\n(B) Providence\n(C) Baton Rouge\n(D) Burlington" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 4.140979290008545 }, { "id": "identity_328101", "image": "TQA/images/question_images/ozone_formation_9202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is produced when UV radiation interacts with oxygen molecules?\nChoices:\n(A) ozone molecule\n(B) oxygen atoms\n(C) hydrogen atoms\n(D) helium atoms" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.53125, "loss": 4.259025573730469 }, { "id": "identity_331824", "image": "TextVQA/images/1ce8e44536df5739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a large electronic device?" }, { "from": "gpt", "value": "The answer is samsung" } ], "clip_score": 15.9609375, "loss": 5.676380634307861 }, { "id": "identity_38167", "image": "PMC-VQA/images/PMC6988233_Fig1_107589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are some characteristics of the pulmonary fibrosis shown in the images?\nChoices:\n(A) Decreased reticulation and honeycombing, \n(B) Lower lobe predominant and air trapping, \n(C) Traction bronchiectasis and honeycombing, \n(D) Emphysema and blebs " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.035122871398926 }, { "id": "identity_275831", "image": "TQA/images/question_images/lewis_dot_diagrams_9134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this molecule? (A) bfff (B) BF3 (C) FBFF (D) 3FB" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.546875, "loss": 4.105399131774902 }, { "id": "identity_313639", "image": "A-OKVQA/images/17409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How old is this producer?\nChoices:\n(A) 150 years\n(B) 60 years\n(C) 40 years\n(D) 200 years" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.8984375, "loss": 4.089859962463379 }, { "id": "identity_214650", "image": "VQA2.0/images/107353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is advertised on the signboard on the right?" }, { "from": "gpt", "value": "The answer is Computer Sales and Networking" } ], "clip_score": 16.5, "loss": 4.473276138305664 }, { "id": "identity_265631", "image": "DVQA/images/bar_train_00011446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the slateblue color?" }, { "from": "gpt", "value": "The answer is task" } ], "clip_score": 26.390625, "loss": 5.2750935554504395 }, { "id": "identity_110803", "image": "VQA2.0/images/376590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are watching the skateboarder?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 26.328125, "loss": 4.743660926818848 }, { "id": "identity_266203", "image": "DocVQA/images/xnkp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the passenger?" }, { "from": "gpt", "value": "The answer is dr. William darby" } ], "clip_score": 22.734375, "loss": 4.656695365905762 }, { "id": "identity_263047", "image": "FigureQA/images/36147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Web Maroon or Deep Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 4.573467254638672 }, { "id": "identity_12498", "image": "PMC-VQA/images/PMC4604447_fig3_433019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the mice body is being suspended when conducting the behavior observation of control mice? \nChoices:\n(A) tail \n(B) head \n(C) hindlimbs \n(D) forelimbs " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.515625, "loss": 4.407443046569824 }, { "id": "identity_28581", "image": "A-OKVQA/images/244231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the thin objects the birds are sitting on?\nChoices:\n(A) branches\n(B) poles\n(C) power lines\n(D) traps" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.28125, "loss": 4.162551403045654 }, { "id": "identity_34547", "image": "PMC-VQA/images/PMC7803468_Fig5_341099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is seen in the area of consolidation? \nChoices:\n(A) Blood vessels \n(B) Sonographic air bronchogram \n(C) Intestinal gas \n(D) Muscle tissue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.484375, "loss": 4.327646732330322 }, { "id": "identity_276460", "image": "VizWiz/images/VizWiz_train_00014879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the captcha?" }, { "from": "gpt", "value": "The answer is nad6" } ], "clip_score": 29.9375, "loss": 4.099214553833008 }, { "id": "identity_146072", "image": "DocVQA/images/mrpx0078_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the participant?" }, { "from": "gpt", "value": "The answer is Lee Edward Smith" } ], "clip_score": 22.828125, "loss": 4.743326663970947 }, { "id": "identity_279883", "image": "TQA/images/abc_question_images/parts_telescope_18152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the letter that corresponds to the lens in this telescope:\n(A) P\n(B) F\n(C) W\n(D) E" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.609375, "loss": 4.224174499511719 }, { "id": "identity_269258", "image": "ScienceQA/images/20454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Rudy's experiment to answer?\nChoices:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.328125, "loss": 4.103672981262207 }, { "id": "identity_125201", "image": "VQA2.0/images/186923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.671875, "loss": 4.663984775543213 }, { "id": "identity_321609", "image": "DVQA/images/bar_train_00148414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fifth group of bars?" }, { "from": "gpt", "value": "The answer is ride" } ], "clip_score": 20.734375, "loss": 5.1040544509887695 }, { "id": "identity_323655", "image": "ScienceQA/images/5204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) sweet\n(B) yellow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.453125, "loss": 4.335238456726074 }, { "id": "identity_36970", "image": "PMC-VQA/images/PMC7251749_Fig1_178969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the blue and red lines on the graph represent? \nChoices:\n(A) different cell types \n(B) different brain regions \n(C) different age groups \n(D) different genes " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.6875, "loss": 4.648977279663086 }, { "id": "identity_318854", "image": "A-OKVQA/images/142014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of sign is depicted?\nChoices:\n(A) traffic\n(B) name\n(C) brand\n(D) price" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.46875, "loss": 4.256897926330566 }, { "id": "identity_190709", "image": "PMC-VQA/images/PMC5292996_fig4_122972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality can be observed in the right lung?" }, { "from": "gpt", "value": "The answer is Right lung consolidation or mass" } ], "clip_score": 25.96875, "loss": 4.015322685241699 }, { "id": "identity_292258", "image": "A-OKVQA/images/27005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of labeling is present on the barrel? Is it a brand, regulatory, warning, or directional label?\nChoices:\n(A) brand\n(B) regulatory\n(C) warning\n(D) directional" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.754448890686035 }, { "id": "identity_279741", "image": "Super-CLEVR/images/superCLEVR_new_001530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other fighters constructed from the same material as the small purple fighter?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.703125, "loss": 5.084621429443359 }, { "id": "identity_9979", "image": "TQA/images/abc_question_images/parts_microscope_17202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The lens is denoted by which letter in the picture?\nChoices:\n(A) E\n(B) J\n(C) M\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.65625, "loss": 4.00637149810791 }, { "id": "identity_212793", "image": "A-OKVQA/images/533532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any sporting equipment items visible?" }, { "from": "gpt", "value": "The answer is Yes, a basketball" } ], "clip_score": 10.96875, "loss": 4.531473159790039 }, { "id": "identity_49585", "image": "DVQA/images/bar_train_00158747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of 'beach' items?" }, { "from": "gpt", "value": "The answer is fear" } ], "clip_score": 29.28125, "loss": 4.885906219482422 }, { "id": "identity_29305", "image": "DVQA/images/bar_train_00167788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fifth group of bars from the left?" }, { "from": "gpt", "value": "The answer is storm" } ], "clip_score": 13.65625, "loss": 4.002861976623535 }, { "id": "identity_31750", "image": "A-OKVQA/images/182801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who has priority on the crossing when the light is red?\nChoices:\n(A) women\n(B) ceos\n(C) pedestrians\n(D) drivers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.609375, "loss": 4.183030128479004 }, { "id": "identity_332036", "image": "A-OKVQA/images/70744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does this scene take place?\nChoices:\n(A) summer\n(B) vietnam war\n(C) winter\n(D) korean war" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.4375, "loss": 4.713136196136475 }, { "id": "identity_336415", "image": "VizWiz/images/VizWiz_train_00013440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.78125, "loss": 4.129044055938721 }, { "id": "identity_321864", "image": "PMC-VQA/images/PMC3614032_fig1_195871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the skull was affected by this pathology?\nChoices:\n(A) Temporal bone\n(B) Ethmoid bone\n(C) Sphenoid bone\n(D) Parietal bone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.34375, "loss": 4.471554756164551 }, { "id": "identity_278553", "image": "TQA/images/abc_question_images/blastocyst_19029.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label that indicates the trophoblast of the blastocyst among the choices provided: N, T, P, or L?\nChoices:\n(A) N\n(B) T\n(C) P\n(D) L" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.15625, "loss": 4.390861511230469 }, { "id": "identity_196621", "image": "A-OKVQA/images/431675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible on the left side of the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.140625, "loss": 4.118396759033203 }, { "id": "identity_29489", "image": "AI2D/images/1143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the diagram?\nChoices:\n(A) Fish\n(B) Bacteria\n(C) Plant\n(D) Animal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.6875, "loss": 4.333065986633301 }, { "id": "identity_271599", "image": "TQA/images/abc_question_images/human_system_excretory_16117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the name of the structure connecting J to M? \n(A) FALLOPIAN TUBE\n(B) BRAIN\n(C) URETHRA \n(D) OVARIES" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.171875, "loss": 4.097786903381348 }, { "id": "identity_18612", "image": "FigureQA/images/79301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.015625, "loss": 4.173890113830566 }, { "id": "identity_157849", "image": "AI2D/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary food source for the Impala in the diagram?" }, { "from": "gpt", "value": "The answer is Woodland" } ], "clip_score": 27.59375, "loss": 4.277344226837158 }, { "id": "identity_290321", "image": "Super-CLEVR/images/superCLEVR_new_000189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the purple object on the right side of the large thing and the small object behind the big purple regular bus constructed from the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.84375, "loss": 5.071874141693115 }, { "id": "identity_158523", "image": "TQA/images/question_images/optics_ray_diagrams_9172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the object in relation to the image height?" }, { "from": "gpt", "value": "The answer is h = h'" } ], "clip_score": 27.140625, "loss": 4.0300703048706055 }, { "id": "identity_17745", "image": "TQA/images/question_images/nuclear_energy_7100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the condenser in the following diagram?\nChoices:\n(A) Provide water for the cooling towers\n(B) Cool the steam and turn it back into water\n(C) Transfer excess water from the turbine to the reservoir\n(D) Heat the water to create steam" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.5625, "loss": 4.503684997558594 }, { "id": "identity_12131", "image": "A-OKVQA/images/343769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The dominant color on the shirt is the same color as what food item?\nChoices:\n(A) ketchup\n(B) mustard\n(C) salt\n(D) relish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.53125, "loss": 4.163279056549072 }, { "id": "identity_291060", "image": "TabMWP/images/29810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of laps driven by the race car driver over the past 5 days?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 35.78125, "loss": 4.2414937019348145 }, { "id": "identity_141890", "image": "DocVQA/images/tkhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who signed the document on behalf of The Nutrition Foundation, Inc.?" }, { "from": "gpt", "value": "The answer is H. Callender" } ], "clip_score": 25.5, "loss": 4.34196138381958 }, { "id": "identity_217611", "image": "VQA2.0/images/580906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car parked in front of the store with a red sign?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.4375, "loss": 4.258198261260986 }, { "id": "identity_325099", "image": "A-OKVQA/images/334234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is illuminated?\nChoices:\n(A) desk\n(B) traffic lights\n(C) bar\n(D) tree" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.90625, "loss": 4.162984371185303 }, { "id": "identity_116024", "image": "A-OKVQA/images/511168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the mountains in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 13.9296875, "loss": 4.678691864013672 }, { "id": "identity_38640", "image": "PMC-VQA/images/PMC3017954_fig5_83583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the image? \nChoices:\n(A) The whole plant \n(B) A flower bud \n(C) A stem node \n(D) A leaf surface " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.28125, "loss": 4.1556525230407715 }, { "id": "identity_36011", "image": "PMC-VQA/images/PMC9088844_F1_276123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the patient in the figure showing a single or multiple bony abnormalities? \nChoices:\n(A) Single \n(B) Multiple \n(C) Absent \n(D) Unknown " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.96875, "loss": 4.040863513946533 }, { "id": "identity_297264", "image": "ScienceQA/images/19175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by these three objects among the following options: transparent, fragile, or yellow?\nChoices:\n(A) transparent\n(B) fragile\n(C) yellow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.9453125, "loss": 4.364482879638672 }, { "id": "identity_68095", "image": "ScienceQA/images/1480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings with prominent tower-like structures can be seen in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.578125, "loss": 4.726438999176025 }, { "id": "identity_266365", "image": "A-OKVQA/images/222251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located above the nose of the woman on the left?\nChoices:\n(A) jewelry\n(B) glasses\n(C) hat\n(D) dirt" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.890625, "loss": 4.82675313949585 }, { "id": "identity_331653", "image": "IconQA/images/94422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there an even or odd number of hearts?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.59375, "loss": 4.164198875427246 }, { "id": "identity_301106", "image": "PMC-VQA/images/PMC7847261_DMM046995F3_356423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the outliers shown as in the boxplots?\nChoices:\n(A) Dotted lines \n(B) Bold lines \n(C) Individual points \n(D) Shaded areas" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.7265625, "loss": 4.123048782348633 }, { "id": "identity_269895", "image": "VQA2.0/images/321511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the photo contain a fence?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.796875, "loss": 4.207322120666504 }, { "id": "identity_319799", "image": "A-OKVQA/images/65668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person wearing a huge hat?\nChoices:\n(A) disguise\n(B) sun protection\n(C) hide money\n(D) selling hat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.078125, "loss": 4.317988395690918 }, { "id": "identity_247668", "image": "VQA-AS/images/17034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are red, and what are they?" }, { "from": "gpt", "value": "The answer is One red moped" } ], "clip_score": 32.75, "loss": 4.051231861114502 }, { "id": "identity_252874", "image": "ScienceQA/images/7435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the meanings of the prefixes and suffixes provided in the table, what is the most likely meaning of the word \"antebellum\" and how does it relate to the meanings of the words \"antecedent\" and \"casus belli\"?" }, { "from": "gpt", "value": "The answer is before the war" } ], "clip_score": 31.203125, "loss": 4.25217866897583 }, { "id": "identity_142041", "image": "DocVQA/images/jkxn0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who attended the meeting as the Legal Director?" }, { "from": "gpt", "value": "The answer is Ed Mackey" } ], "clip_score": 22.609375, "loss": 4.078124046325684 }, { "id": "identity_43325", "image": "MapQA/images/map_6589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value within the range of 19.8%-20.4%?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 17.625, "loss": 4.100424289703369 }, { "id": "identity_115989", "image": "A-OKVQA/images/301977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different beverage brands can be seen in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 19.609375, "loss": 4.3684186935424805 }, { "id": "identity_81934", "image": "GeoQA+/images/632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that D, E, and F are midpoints of the sides of triangle ABC, and the area of triangle ABC is 8 cm², what is the length of segment EF?" }, { "from": "gpt", "value": "The answer is Length of AC]/2" } ], "clip_score": 35.5, "loss": 4.161518573760986 }, { "id": "identity_317963", "image": "Super-CLEVR/images/superCLEVR_new_000376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shiny motorbike to the right of the gray metallic object the same size as the metallic motorbike left of the large green bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 34.15625, "loss": 5.139807224273682 }, { "id": "identity_337097", "image": "A-OKVQA/images/319682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might one hear when these creatures move?\nChoices:\n(A) screaming\n(B) thunder\n(C) trumpets\n(D) bells" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.1953125, "loss": 4.163861274719238 }, { "id": "identity_209707", "image": "VQA-AS/images/2574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of furniture are in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.765625, "loss": 4.607329368591309 }, { "id": "identity_264985", "image": "PMC-VQA/images/PMC8139191_f1_446949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which gyrus is situated behind the hypointensity that is being observed?\nChoices:\n(A) Postcentral gyrus\n(B) Precentral gyrus\n(C) Superior parietal lobule\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.375, "loss": 4.296486854553223 }, { "id": "identity_285432", "image": "PMC-VQA/images/PMC7291422_Fig2_191427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To what does the white arrow in the image direct your attention?\nChoices:\n(A) The liver of the foetus \n(B) The heart of the foetus \n(C) The dilated VGAM \n(D) The straight sinus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.40625, "loss": 4.183138370513916 }, { "id": "identity_273347", "image": "PMC-VQA/images/PMC6281586_Fig6_408445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scan is being utilized among the following options: Axial CT, Sagittal CT, Coronal CT, or MRI?\nChoices:\n(A) Axial CT \n(B) Sagittal CT \n(C) Coronal CT \n(D) MRI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.140625, "loss": 4.061121940612793 }, { "id": "identity_15274", "image": "TabMWP/images/4510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.125, "loss": 4.367845058441162 }, { "id": "identity_172357", "image": "TabMWP/images/21213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the goal was to pick at least 3,000 apples each day, on which days was the goal not met?" }, { "from": "gpt", "value": "The answer is Two days" } ], "clip_score": 33.1875, "loss": 4.18117618560791 }, { "id": "identity_254558", "image": "PMC-VQA/images/PMC6662306_pbi13074-fig-0003_15391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different elements are shown in the image, and which elements are they?" }, { "from": "gpt", "value": "The answer is One element, Fe" } ], "clip_score": 21.015625, "loss": 4.729501247406006 }, { "id": "identity_147689", "image": "DocVQA/images/qxmp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code mentioned at the bottom of the page?" }, { "from": "gpt", "value": "The answer is SBM:atm" } ], "clip_score": 16.21875, "loss": 4.541974067687988 }, { "id": "identity_282506", "image": "PMC-VQA/images/PMC5585196_F2_211657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the perspective utilized to categorize the iNPH and HC groups? \nOptions:\n(A) Bottom perspective\n(B) Front perspective\n(C) Left perspective\n(D) Top perspective" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.28125, "loss": 5.139166355133057 }, { "id": "identity_330321", "image": "TextVQA/images/b2e74817b47adeae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this board say?" }, { "from": "gpt", "value": "The answer is sugar" } ], "clip_score": 14.453125, "loss": 5.46359395980835 }, { "id": "identity_306022", "image": "IconQA/images/88462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of this is purple?\nChoices:\n(A) 2/8\n(B) 1/10\n(C) 1/8\n(D) 1/6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 4.078131675720215 }, { "id": "identity_36844", "image": "Super-CLEVR/images/superCLEVR_new_001722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The big red matte thing is what shape?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 26.140625, "loss": 4.149038791656494 }, { "id": "identity_142718", "image": "DocVQA/images/rtvw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the manuscript number for the paper titled \"The Role of HRT in the Prevention of Postmenopausal Heart Disease\"?" }, { "from": "gpt", "value": "The answer is SERMs #4" } ], "clip_score": 24.890625, "loss": 4.8359832763671875 }, { "id": "identity_268877", "image": "VQA-AS/images/12623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What game are the boys playing?" }, { "from": "gpt", "value": "The answer is catch" } ], "clip_score": 21.9375, "loss": 4.343417644500732 }, { "id": "identity_138660", "image": "DVQA/images/bar_train_00071692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the top?" }, { "from": "gpt", "value": "The answer is member" } ], "clip_score": 22.546875, "loss": 4.286175727844238 }, { "id": "identity_2775", "image": "PMC-VQA/images/PMC7545865_Fig2_268918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow arrows in (d) indicate about the cerebral veins? \nChoices:\n(A) Blockage \n(B) Normal opacification \n(C) Aneurysm \n(D) Intracranial bleeding " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.3125, "loss": 4.815119743347168 }, { "id": "identity_278777", "image": "Super-CLEVR/images/superCLEVR_new_000203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small scooters located behind the metallic motorbike that is in front of the blue rubber biplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.96875, "loss": 5.476252555847168 }, { "id": "identity_187029", "image": "VQA-AS/images/10183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paintings are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.828125, "loss": 4.380759239196777 }, { "id": "identity_331997", "image": "A-OKVQA/images/171785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of thing is this?\nChoices:\n(A) deck\n(B) slide\n(C) flume\n(D) ride" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.234375, "loss": 4.426119804382324 }, { "id": "identity_328962", "image": "PMC-VQA/images/PMC4255819_fig02_341658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishes the two images shown?\nChoices:\n(A) Different muscles were imaged \n(B) The images were taken at different time points \n(C) The images were taken at different angular velocities \n(D) The images were taken at different contraction types." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.796875, "loss": 4.6656341552734375 }, { "id": "identity_325157", "image": "A-OKVQA/images/495051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why has the person covered their head?\nChoices:\n(A) style\n(B) keep cool\n(C) protection\n(D) uniform" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.4765625, "loss": 4.379782199859619 }, { "id": "identity_310943", "image": "PMC-VQA/images/PMC2627817_F1_32599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this scan of the chest show?\nChoices:\n(A) Bone fracture\n(B) Cavitary lesion with air-fluid level\n(C) Nasal congestion\n(D) Swollen lymph nodes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.093073844909668 }, { "id": "identity_6878", "image": "FigureQA/images/62359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.515625, "loss": 4.3103227615356445 }, { "id": "identity_331337", "image": "PMC-VQA/images/PMC4330230_Fig7_358755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed on the MRI scans?\nChoices:\n(A) No uptake or reaction on a femoral shaft.\n(B) Severe uptake and bone reaction on a femoral shaft.\n(C) Mild uptake and minor bone reaction on a femoral shaft.\n(D) Mild uptake and major bone reaction on a femoral shaft." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.046875, "loss": 4.078505516052246 }, { "id": "identity_204064", "image": "TextVQA/images/6d4a18e451f37808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature of the toy is highlighted with a special label on the packaging?" }, { "from": "gpt", "value": "The answer is Force Action" } ], "clip_score": 21.328125, "loss": 4.10252571105957 }, { "id": "identity_270225", "image": "Super-CLEVR/images/superCLEVR_new_000965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the dimensions of the blue mountain bike next to the blue rubber car equal to the size of the car next to the small metallic bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.53125, "loss": 4.901566028594971 }, { "id": "identity_336434", "image": "A-OKVQA/images/497119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the small dog wearing in the sidecar?\nChoices:\n(A) helmet\n(B) scarf\n(C) hat\n(D) tshirt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.65625, "loss": 4.472812175750732 }, { "id": "identity_324042", "image": "PMC-VQA/images/PMC3192142_pone-0026113-g001_111577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the orange dots on this illustrate?\nChoices:\n(A) Optimal stimulation site \n(B) The coil orientation \n(C) Anatomically defined hand-knob \n(D) The mapping of the area around the anatomically defined hand-knob." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.953125, "loss": 4.004714012145996 }, { "id": "identity_112917", "image": "TextVQA/images/f57febb0fc7224f2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What series is being advertised on the yellow flyer?" }, { "from": "gpt", "value": "The answer is Winter Speaker Series" } ], "clip_score": 14.921875, "loss": 4.06219482421875 }, { "id": "identity_109492", "image": "Super-CLEVR/images/superCLEVR_new_000756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny wagon closest to the camera made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 21.328125, "loss": 4.145806789398193 }, { "id": "identity_24363", "image": "A-OKVQA/images/48319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is this?\nChoices:\n(A) zoo\n(B) dog show\n(C) theme park\n(D) sporting event" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.171875, "loss": 4.299449443817139 }, { "id": "identity_312502", "image": "FigureQA/images/53769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.057625770568848 }, { "id": "identity_219378", "image": "VQA-AS/images/4140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.0703125, "loss": 4.253100872039795 }, { "id": "identity_51626", "image": "DVQA/images/bar_train_00058987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the touch algorithm?" }, { "from": "gpt", "value": "The answer is humor" } ], "clip_score": 32.40625, "loss": 4.045883655548096 }, { "id": "identity_61529", "image": "FigureQA/images/49042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the smallest section in the pie chart?" }, { "from": "gpt", "value": "The answer is Peru" } ], "clip_score": 34.53125, "loss": 4.23429536819458 }, { "id": "identity_67239", "image": "ScienceQA/images/2701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many white stripes does the fish have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.078125, "loss": 4.221020221710205 }, { "id": "identity_327827", "image": "IconQA/images/40745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.984375, "loss": 4.423079967498779 }, { "id": "identity_118615", "image": "A-OKVQA/images/337277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are on the bookshelf to the left?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 17.9375, "loss": 4.494149684906006 }, { "id": "identity_20078", "image": "PMC-VQA/images/PMC8718535_Fig2_153886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the target lesion? \nChoices:\n(A) Segment 2/3 \n(B) Segment 4/5 \n(C) Segment 5/6 \n(D) Segment 8/9 " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.765625, "loss": 4.283053874969482 }, { "id": "identity_187500", "image": "VQA2.0/images/483956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the ladder compared to the bird sculpture?" }, { "from": "gpt", "value": "The answer is Shorter" } ], "clip_score": 23.484375, "loss": 4.492832183837891 }, { "id": "identity_115482", "image": "VQA2.0/images/500576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs of the horse are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.734375, "loss": 4.563436031341553 }, { "id": "identity_300956", "image": "A-OKVQA/images/188587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of aircraft is shown here?\nChoices:\n(A) jet\n(B) biplane\n(C) helicopter\n(D) seaplane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.765625, "loss": 4.367484092712402 }, { "id": "identity_302695", "image": "TextVQA/images/f787cf818d8943c4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What name is mentioned in the bottom right?" }, { "from": "gpt", "value": "The answer is victor gutierrez" } ], "clip_score": 13.9765625, "loss": 4.222358226776123 }, { "id": "identity_22801", "image": "PMC-VQA/images/PMC7232200_microorganisms-08-00582-f009_172750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows in the image indicate?\nChoices:\n(A) destroyed organoids \n(B) osmiophilic material \n(C) electron-transparent vacuole \n(D) cell membrane " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.421875, "loss": 4.083154678344727 }, { "id": "identity_4469", "image": "VQA2.0/images/575590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of busses are those?" }, { "from": "gpt", "value": "The answer is school" } ], "clip_score": 26.390625, "loss": 4.335500717163086 }, { "id": "identity_90437", "image": "Geometry3K/images/962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x if the quadrilateral is a parallelogram and y = 7?" }, { "from": "gpt", "value": "The answer is No solution" } ], "clip_score": 35.59375, "loss": 4.320539474487305 }, { "id": "identity_300364", "image": "IconQA/images/26662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the shape here?\nChoices:\n(A) square\n(B) circle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.6875, "loss": 4.064613342285156 }, { "id": "identity_32487", "image": "PMC-VQA/images/PMC7441549_Fig1_238604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the expansion seen in the image? \nChoices:\n(A) Through the mouth roof \n(B) Through the jaw bone \n(C) Through the floor of the mouth \n(D) Through the nose " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.046875, "loss": 4.027824401855469 }, { "id": "identity_267528", "image": "AI2D/images/3495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the symbol P represent?\nChoices:\n(A) none of the above\n(B) nucleus\n(C) chloroplast\n(D) ribosome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.890625, "loss": 4.248814105987549 }, { "id": "identity_332818", "image": "IconQA/images/12928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) certain\n(B) unlikely\n(C) impossible\n(D) probable" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.209570407867432 }, { "id": "identity_328601", "image": "VQA-AS/images/4644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the person's pants?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 19.78125, "loss": 4.003924369812012 }, { "id": "identity_326663", "image": "A-OKVQA/images/178258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What role does this person play?\nChoices:\n(A) actor\n(B) motorcycle racer\n(C) stuntman\n(D) terrorist" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.296875, "loss": 4.593344211578369 }, { "id": "identity_294938", "image": "PMC-VQA/images/PMC7341097_advs1797-fig-0005_206608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in image B?\nChoices:\n(A) Ex vivo fluorescent imaging. \n(B) Image-guided resection of the tumor. \n(C) Immunofluorescent staining of the tumor. \n(D) Collection of organs for analysis." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.234375, "loss": 4.153542518615723 }, { "id": "identity_297907", "image": "PMC-VQA/images/PMC3439461_pone-0044086-g002_154302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is located in the center of the MRI section?\nChoices:\n(A) Medulla oblongata and pons\n(B) Olfactory bulbs and beginning of the telencephalon\n(C) Cerebral cortex\n(D) Thalamus." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.125, "loss": 4.403558254241943 }, { "id": "identity_316130", "image": "Super-CLEVR/images/superCLEVR_new_001038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that is the same size as this object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 19.25, "loss": 4.8218560218811035 }, { "id": "identity_19565", "image": "VQA2.0/images/181992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the nearest storage bin?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 17.234375, "loss": 4.6691694259643555 }, { "id": "identity_270161", "image": "A-OKVQA/images/519950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for his arm being raised so high?\nChoices:\n(A) is tired\n(B) hit ball\n(C) wants attention\n(D) is falling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.2109375, "loss": 4.783605575561523 }, { "id": "identity_276733", "image": "PMC-VQA/images/PMC5818700_Fig1_278002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of echocardiogram was utilized to capture the image out of the following options: (A) Transthoracic echocardiogram, (B) Cardiac MRI, (C) Transesophageal echocardiogram, or (D) Pulmonary angiogram?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.4375, "loss": 4.590145111083984 }, { "id": "identity_134926", "image": "DocVQA/images/hxpw0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender?" }, { "from": "gpt", "value": "The answer is Ken Podraza" } ], "clip_score": 20.078125, "loss": 5.304971694946289 }, { "id": "identity_138120", "image": "DocVQA/images/tjgn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Promotion?" }, { "from": "gpt", "value": "The answer is Camel RB21F (Naked)" } ], "clip_score": 19.609375, "loss": 4.600339412689209 }, { "id": "identity_281473", "image": "IconQA/images/82325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.09375, "loss": 4.018919944763184 }, { "id": "identity_115307", "image": "VQA2.0/images/204424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of flowers can be seen in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.46875, "loss": 4.968760967254639 }, { "id": "identity_276302", "image": "FigureQA/images/10169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in shade, Indian Red or Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.296875, "loss": 4.605249404907227 }, { "id": "identity_319027", "image": "ScienceQA/images/1549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this text describe the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 5.86328125, "loss": 4.183335781097412 }, { "id": "identity_184438", "image": "PMC-VQA/images/PMC6692028_pone.0220922.g001_22949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Horizontal cross-section" } ], "clip_score": 31.40625, "loss": 4.1066155433654785 }, { "id": "identity_316981", "image": "ScienceQA/images/20468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Montpelier\n(B) Burlington\n(C) Dover\n(D) Providence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.421875, "loss": 4.338964462280273 }, { "id": "identity_208641", "image": "PMC-VQA/images/PMC5487236_f1_183519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure indicated by the dashed arrow?" }, { "from": "gpt", "value": "The answer is Duct or vessel" } ], "clip_score": 18.59375, "loss": 4.054539680480957 }, { "id": "identity_110705", "image": "VQA-AS/images/868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is behind the woman?" }, { "from": "gpt", "value": "The answer is A sandbox" } ], "clip_score": 23.234375, "loss": 4.194550037384033 }, { "id": "identity_325602", "image": "FigureQA/images/25240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.296875, "loss": 4.69757604598999 }, { "id": "identity_306430", "image": "Super-CLEVR/images/superCLEVR_new_001715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this small shiny thing have the same shape as the yellow rubber thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 15.265625, "loss": 4.919938087463379 }, { "id": "identity_317498", "image": "TQA/images/question_images/human_system_respiratory_6192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part is directly above the trachea?\nChoices:\n(A) larynx\n(B) nasal cavity\n(C) pharynx\n(D) alveoli" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.0, "loss": 4.132069110870361 }, { "id": "identity_196153", "image": "A-OKVQA/images/203986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.546875, "loss": 4.803308486938477 }, { "id": "identity_113337", "image": "VQA2.0/images/576809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many railroad crossing arms can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 31.109375, "loss": 4.374734401702881 }, { "id": "identity_327520", "image": "PMC-VQA/images/PMC8085189_Fig1_431007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the position during the CT scan?\nChoices:\n(A) Supine\n(B) Prone\n(C) Standing\n(D) Lateral" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.671875, "loss": 4.382153511047363 }, { "id": "identity_333974", "image": "IconQA/images/60746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.80239200592041 }, { "id": "identity_43504", "image": "DVQA/images/bar_train_00050005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the second highest value?" }, { "from": "gpt", "value": "The answer is love" } ], "clip_score": 20.15625, "loss": 4.355029582977295 }, { "id": "identity_325183", "image": "PMC-VQA/images/PMC6996318_fba21096-fig-0003_109928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red rectangle in the upper panels signify? \nChoices:\n(A) Area scanned with a 5 µm magnification \n(B) Area scanned with a 1 µm magnification \n(C) The area with the largest number of axons \n(D) The area of the corpus callosum with the thickest myelin coating." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.9375, "loss": 4.4622039794921875 }, { "id": "identity_331345", "image": "A-OKVQA/images/124907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of hat is the person on the right wearing?\nChoices:\n(A) bowlers cap\n(B) newsboy cap\n(C) bottle cap\n(D) baseball cap" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.6015625, "loss": 4.427187442779541 }, { "id": "identity_271050", "image": "FigureQA/images/55279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Red considered the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.59375, "loss": 4.452356338500977 }, { "id": "identity_143201", "image": "PlotQA/images/54720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code for the year 1960?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 11.6171875, "loss": 4.264026165008545 }, { "id": "identity_319174", "image": "ScienceQA/images/4447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the expected ratio of offspring with a certain body type to offspring with another body type? Choose the most likely ratio.\nChoices:\n(A) 3:1\n(B) 1:3\n(C) 0:4\n(D) 2:2\n(E) 4:0" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 30.015625, "loss": 4.013119220733643 }, { "id": "identity_297426", "image": "A-OKVQA/images/579232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is at risk? \n(A) pedestrian \n(B) bikes \n(C) signs \n(D) cars" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.96875, "loss": 4.02069616317749 }, { "id": "identity_273590", "image": "VQA2.0/images/228678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is the 8th one going down the sign on the right?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 18.0625, "loss": 4.232209205627441 }, { "id": "identity_117540", "image": "TextVQA/images/f135bcc3b6b9e1bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt the person is wearing?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 15.9296875, "loss": 4.9172234535217285 }, { "id": "identity_317822", "image": "TQA/images/question_images/human_system_muscular_6151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the quadriceps connected to in the leg?\nChoices:\n(A) Biceps\n(B) Frontalis\n(C) Deltoid\n(D) Tibialis anterior" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.25, "loss": 4.118044376373291 }, { "id": "identity_330672", "image": "A-OKVQA/images/333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person have on their feet?\nChoices:\n(A) sandals\n(B) cowboy boots\n(C) sneakers\n(D) dress shoes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.453125, "loss": 4.423192024230957 }, { "id": "identity_306918", "image": "TextVQA/images/13ec950b0f7f07db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this watch?" }, { "from": "gpt", "value": "The answer is nomos " } ], "clip_score": 30.5, "loss": 4.937671661376953 }, { "id": "identity_336142", "image": "TabMWP/images/30333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table displays a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.625, "loss": 4.26854133605957 }, { "id": "identity_19426", "image": "A-OKVQA/images/102111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What famous bird is also this colour?\nChoices:\n(A) parrot\n(B) black bird\n(C) flamingo\n(D) eagle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.3046875, "loss": 4.21612548828125 }, { "id": "identity_26430", "image": "AI2D/images/4190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does b represent in the diagram?\nChoices:\n(A) melting\n(B) weathering\n(C) crystalisation\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 4.033987998962402 }, { "id": "identity_333184", "image": "Super-CLEVR/images/superCLEVR_new_000838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small shiny things of the same shape as the red object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 15.8046875, "loss": 5.2527079582214355 }, { "id": "identity_239205", "image": "A-OKVQA/images/278528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary natural element responsible for the movement of the flying object, and how does this element interact with the object to enable its flight?" }, { "from": "gpt", "value": "The answer is wind" } ], "clip_score": 19.609375, "loss": 4.7780280113220215 }, { "id": "identity_330552", "image": "IconQA/images/85395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.125, "loss": 4.171507358551025 }, { "id": "identity_315908", "image": "IconQA/images/33266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, which color are you less likely to choose?\nChoices:\n(A) black\n(B) light blue\n(C) neither; black and light blue are equally likely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.90625, "loss": 4.638464450836182 }, { "id": "identity_306869", "image": "ScienceQA/images/14576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Helena\n(B) Nampa\n(C) Boise\n(D) Philadelphia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0, "loss": 4.546144008636475 }, { "id": "identity_256427", "image": "FigureQA/images/9610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the lengths of the bars for Navy Blue and Black, and how does this compare to the length of the bar for Salmon?" }, { "from": "gpt", "value": "The answer is equal" } ], "clip_score": 28.25, "loss": 5.603146553039551 }, { "id": "identity_2483", "image": "A-OKVQA/images/207491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting does this bus drive?\nChoices:\n(A) rural\n(B) sand desert\n(C) urban\n(D) industrial" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5390625, "loss": 4.310641288757324 }, { "id": "identity_33091", "image": "VQA2.0/images/110105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the child keeping the man from falling?" }, { "from": "gpt", "value": "The answer is no; taking picture" } ], "clip_score": 16.109375, "loss": 5.010456085205078 }, { "id": "identity_309393", "image": "TextVQA/images/50e76ba6145534c6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do you think if there are no people crossing, vehicles will still wait?" }, { "from": "gpt", "value": "The answer is go.." } ], "clip_score": 21.171875, "loss": 5.304941654205322 }, { "id": "identity_20016", "image": "A-OKVQA/images/526414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The white machine is used to manipulate what?\nChoices:\n(A) paper\n(B) fabric\n(C) metal\n(D) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.6875, "loss": 4.114803314208984 }, { "id": "identity_271957", "image": "TQA/images/abc_question_images/human_system_muscular_16165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the adductor muscles from the following options:\n(A) T\n(B) R\n(C) A\n(D) F" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.828125, "loss": 4.227023124694824 }, { "id": "identity_22596", "image": "PMC-VQA/images/PMC6647078_Fig2_12408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique is used to visualize ADC mapping? \nChoices:\n(A) DCE-MRI \n(B) DWI \n(C) Mammography \n(D) PET-CT " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 4.487257480621338 }, { "id": "identity_300822", "image": "IconQA/images/18148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.484375, "loss": 4.168817043304443 }, { "id": "identity_211448", "image": "TextVQA/images/1a3ff09978212117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book with a lighthouse on the cover?" }, { "from": "gpt", "value": "The answer is Et Bofællesskab for Orphaned Children" } ], "clip_score": 26.78125, "loss": 4.862851142883301 }, { "id": "identity_29629", "image": "PMC-VQA/images/PMC7125094_Fig3_144707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the scale bar represent in the images? \nChoices:\n(A) 20 magnification \n(B) 102 micrometers \n(C) both A and B \n(D) none of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.828125, "loss": 4.061180114746094 }, { "id": "identity_283414", "image": "PMC-VQA/images/PMC7847261_DMM046995F3_356415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the bold arrows in the image?\nChoices:\n(A) Destruction of crypts \n(B) Fibrosis \n(C) Influx of inflammatory cells \n(D) Edema." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.65625, "loss": 4.06327486038208 }, { "id": "identity_301960", "image": "A-OKVQA/images/270812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name for the tallest structure?\nChoices:\n(A) clock tower\n(B) library\n(C) station\n(D) pub" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.671875, "loss": 4.357834339141846 }, { "id": "identity_13825", "image": "DVQA/images/bar_train_00103536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.921875, "loss": 4.4349751472473145 }, { "id": "identity_265533", "image": "A-OKVQA/images/426026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which season are these individuals having a good time at the park?\nChoices:\n(A) fall\n(B) summer\n(C) winter\n(D) spring" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.237701416015625 }, { "id": "identity_249439", "image": "A-OKVQA/images/413164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps the persons seated here will follow after ordering their food, and what will be their final action before leaving the restaurant?" }, { "from": "gpt", "value": "The answer is leave the restaurant" } ], "clip_score": 21.15625, "loss": 5.134601593017578 }, { "id": "identity_248951", "image": "VQA2.0/images/64619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image, and what is the color of the glasses worn by the person closest to the cake?" }, { "from": "gpt", "value": "The answer is Three people, brown" } ], "clip_score": 22.875, "loss": 4.79386043548584 }, { "id": "identity_293329", "image": "A-OKVQA/images/110251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found behind the fence?\nChoices:\n(A) cat\n(B) dog\n(C) antelope\n(D) playground" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.265625, "loss": 4.250156879425049 }, { "id": "identity_197746", "image": "VQA2.0/images/550421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible light fixtures are there in the ceiling?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.3359375, "loss": 4.497633934020996 }, { "id": "identity_127502", "image": "TextVQA/images/e684c27b650e0154.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the background of the image?" }, { "from": "gpt", "value": "The answer is Yellow and red gradient" } ], "clip_score": 16.78125, "loss": 4.339643478393555 }, { "id": "identity_299184", "image": "TQA/images/question_images/human_system_digestive_1397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is represented by Letter B in the given choices? \n(A) Rectum\n(B) Lungs\n(C) Head\n(D) Stomach" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.625, "loss": 4.388431549072266 }, { "id": "identity_184789", "image": "VizWiz/images/VizWiz_train_00009127.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand associated with this game?" }, { "from": "gpt", "value": "The answer is ZeniMax Media" } ], "clip_score": 19.453125, "loss": 4.109751224517822 }, { "id": "identity_330115", "image": "Super-CLEVR/images/superCLEVR_new_001877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tiny object behind the gray metal truck the same shape as the rubber thing to the right of the tiny blue bus?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 32.34375, "loss": 5.1655378341674805 }, { "id": "identity_215259", "image": "A-OKVQA/images/380248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the armchair to the left of the red couch?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.15625, "loss": 4.431243896484375 }, { "id": "identity_298423", "image": "PMC-VQA/images/PMC5788696_F3_268707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the long arrow indicate in the Sagittal view?\nChoices:\n(A) Abnormal bone originating from the inner table of the left occipital bone\n(B) Abnormal bone originating from the outer table of the left occipital bone\n(C) Abnormal bone originating from the inner table of the right occipital bone\n(D) Abnormal bone originating from the outer table of the right occipital bone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.84375, "loss": 4.053826808929443 }, { "id": "identity_31349", "image": "PMC-VQA/images/PMC8128441_fig4_443760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown at the same magnification for direct comparison? \nChoices:\n(A) Normal and mutated viruses \n(B) Pru∆hxg and mutant parasites \n(C) Scale bars and Pru∆hxg \n(D) Mutant parasites and normal cells " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0625, "loss": 4.4395647048950195 }, { "id": "identity_300852", "image": "VQA-RAD/images/synpic56061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the brain is affected?" }, { "from": "gpt", "value": "The answer is Anterior surface" } ], "clip_score": 28.96875, "loss": 4.515498161315918 }, { "id": "identity_30322", "image": "DVQA/images/bar_train_00073292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.328125, "loss": 4.312132835388184 }, { "id": "identity_317282", "image": "TextVQA/images/064068705af4ecab.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this watch?" }, { "from": "gpt", "value": "The answer is rolec" } ], "clip_score": 27.40625, "loss": 4.847066402435303 }, { "id": "identity_273034", "image": "ScienceQA/images/20611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.038588523864746 }, { "id": "identity_310204", "image": "PMC-VQA/images/PMC4551529_Fig2_418030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image labeled \"iii ghost tangle\"?\nChoices:\n(A) Intense tau staining of PHFs\n(B) Absence of PHFs\n(C) Ghost tangles with lightly staining PHFs\n(D) Ghost tangles with robust staining of fibrous PHFs." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 4.643603324890137 }, { "id": "identity_141112", "image": "MapQA/images/map_2435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the middle percentage range for hospitals?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 36.21875, "loss": 4.521791458129883 }, { "id": "identity_193746", "image": "TextVQA/images/1ce8e44536df5739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the store where this shopping cart is located?" }, { "from": "gpt", "value": "The answer is Dia" } ], "clip_score": 26.890625, "loss": 4.000617027282715 }, { "id": "identity_332490", "image": "A-OKVQA/images/524044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who uses this item located here?\nChoices:\n(A) child\n(B) adult\n(C) elder\n(D) no body" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.328125, "loss": 4.322495460510254 }, { "id": "identity_288586", "image": "A-OKVQA/images/29525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is located close to the car: a bison, a museum, bicycles, or apple pie?\nChoices:\n(A) bison\n(B) museum\n(C) bicycles\n(D) apple pie" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6875, "loss": 4.143438816070557 }, { "id": "identity_130907", "image": "VQA2.0/images/489422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.4375, "loss": 4.031661510467529 }, { "id": "identity_1752", "image": "ScienceQA/images/7782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is the secondary consumer in this food web?\nChoices:\n(A) beaver\n(B) bobcat\n(C) persimmon tree\n(D) swallowtail caterpillar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.734375, "loss": 4.470410346984863 }, { "id": "identity_323249", "image": "PMC-VQA/images/PMC8387894_fig1_40854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a characteristic of this lesion?\nChoices:\n(A) Smooth edges \n(B) No contrast enhancement \n(C) Located on the frontal lobe \n(D) Causes ischemic stroke" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.765625, "loss": 4.409202575683594 }, { "id": "identity_205021", "image": "A-OKVQA/images/112022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are holding a mobile device?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.90625, "loss": 5.029694557189941 }, { "id": "identity_329623", "image": "TQA/images/question_images/faults_1711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type is the hanging wall below something else?\nChoices:\n(A) Normal\n(B) Simple\n(C) Complicated\n(D) Reverse" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.234375, "loss": 4.25701379776001 }, { "id": "identity_282310", "image": "PMC-VQA/images/PMC9182059_molecules-27-03631-f005_306952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the double arrow symbol shown in the image?\nChoices:\n(A) Expansion \n(B) Immobilization \n(C) Control \n(D) Double magnification" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.921875, "loss": 4.428597450256348 }, { "id": "identity_280784", "image": "AI2D/images/3062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter B represent in the given choices?\n(A) none of the above\n(B) vacuoles\n(C) nucleus\n(D) cell membrane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 4.116141319274902 }, { "id": "identity_299138", "image": "PMC-VQA/images/PMC5542187_F3_199626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscopy was utilized to capture the images: TEM, SEM, Confocal, or Brightfield?\nChoices:\n(A) TEM \n(B) SEM \n(C) Confocal \n(D) Brightfield" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.09375, "loss": 4.04694128036499 }, { "id": "identity_291290", "image": "PMC-VQA/images/PMC7820043_Fig2_347188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What findings are visible in the CCD results?\nChoices:\n(A) Bone loss\n(B) Retained teeth\n(C) Cysts\n(D) Caries" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.609375, "loss": 4.965502738952637 }, { "id": "identity_184875", "image": "A-OKVQA/images/149849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object on the left side of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 15.3828125, "loss": 4.272263526916504 }, { "id": "identity_272615", "image": "VizWiz/images/VizWiz_train_00008371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is written on this shirt?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.03125, "loss": 4.311366081237793 }, { "id": "identity_221128", "image": "A-OKVQA/images/400592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books can be seen on the desk?" }, { "from": "gpt", "value": "The answer is Three books" } ], "clip_score": 24.484375, "loss": 4.984560966491699 }, { "id": "identity_43554", "image": "DocVQA/images/htmd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of authorization is marked for this request?" }, { "from": "gpt", "value": "The answer is Capital Experimental" } ], "clip_score": 20.78125, "loss": 5.266482830047607 }, { "id": "identity_27956", "image": "PMC-VQA/images/PMC6694554_Fig1_23746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the CT scan?\nChoices:\n(A) Right supraclavicular fossa \n(B) Left supraclavicular fossa \n(C) Subclavian fossa \n(D) Inferior clavicle fossa " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.78125, "loss": 4.6607160568237305 }, { "id": "identity_285349", "image": "FigureQA/images/20889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Light Gold or Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.40625, "loss": 4.573987007141113 }, { "id": "identity_59618", "image": "MapQA/images/map_10462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have more than 165 countries?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 31.15625, "loss": 4.862124919891357 }, { "id": "identity_298197", "image": "ScienceQA/images/19827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nChoices:\n(A) yellow\n(B) breakable" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.953125, "loss": 4.247265338897705 }, { "id": "identity_142761", "image": "DVQA/images/bar_train_00151721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the longest blue bar?" }, { "from": "gpt", "value": "The answer is plot" } ], "clip_score": 29.0625, "loss": 4.330315589904785 }, { "id": "identity_287338", "image": "PMC-VQA/images/PMC7071674_Fig4_133768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the term \"central cupping area\" refer to?\nChoices:\n(A) A heart cavity \n(B) A blood vessel \n(C) An opening in the skin \n(D) A visual representation of red blood cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.71875, "loss": 4.196515083312988 }, { "id": "identity_111541", "image": "VQA2.0/images/114697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of hat is the person wearing?" }, { "from": "gpt", "value": "The answer is Bucket hat" } ], "clip_score": 15.703125, "loss": 4.026939868927002 }, { "id": "identity_309583", "image": "AI2D/images/3403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is found at the very end?\nChoices:\n(A) Ventral tufts\n(B) Lateral hair\n(C) Caudal hair\n(D) Spiracular valves" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.90625, "loss": 4.499485492706299 }, { "id": "identity_316351", "image": "PMC-VQA/images/PMC4269278_fig1_345313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the imaging show within these areas?\nChoices:\n(A) Innumerable lobulated enhancing masses \n(B) Normal breast tissue \n(C) Abscesses \n(D) Cysts" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.796875, "loss": 4.029886245727539 }, { "id": "identity_291069", "image": "A-OKVQA/images/98493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which age group primarily uses this space? (A) toddlers, (B) teens, (C) senior citizens, or (D) adults?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.2578125, "loss": 4.896462917327881 }, { "id": "identity_208341", "image": "PMC-VQA/images/PMC6584983_Fig1_496312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the arrowheads in the image?" }, { "from": "gpt", "value": "The answer is Target sign indicating intussusception" } ], "clip_score": 27.5, "loss": 4.282339096069336 }, { "id": "identity_294092", "image": "PMC-VQA/images/PMC3599802_F2_192545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the item highlighted in blue in the image?\nChoices:\n(A) Planning target volume \n(B) Clinical target volume \n(C) Gross tumor volume \n(D) Target volume for treatment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.1875, "loss": 4.14038610458374 }, { "id": "identity_18668", "image": "IconQA/images/94292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is third?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.703125, "loss": 4.735235691070557 }, { "id": "identity_290980", "image": "PMC-VQA/images/PMC7711629_jpm-10-00218-f001_313425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesion is most likely depicted in the image provided? \nOptions:\n(A) Hemorrhage\n(B) Cyst\n(C) Edema\n(D) Tumor" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.0625, "loss": 4.707500457763672 }, { "id": "identity_30975", "image": "A-OKVQA/images/513282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what country is this river in?\nChoices:\n(A) italy\n(B) britain\n(C) france\n(D) spain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.25, "loss": 4.111533164978027 }, { "id": "identity_265463", "image": "A-OKVQA/images/406075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the white object on the man's waist typically used?\nChoices:\n(A) floating\n(B) slimming\n(C) dress code\n(D) protecting backbone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0625, "loss": 4.460020542144775 }, { "id": "identity_282774", "image": "PMC-VQA/images/PMC8610683_fig5_117707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the study, which section of the intestine was visualized?\nChoices:\n(A) Stomach\n(B) Colon\n(C) Duodenum\n(D) Ileum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.28125, "loss": 4.849284648895264 }, { "id": "identity_34369", "image": "PMC-VQA/images/PMC4436458_fig2_387646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the heart is shown in this M-mode tracing? \nChoices:\n(A) Aorta \n(B) Superior vena cava \n(C) Interventricular septum \n(D) Left atrium " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.03125, "loss": 4.673356056213379 }, { "id": "identity_107463", "image": "TextVQA/images/009a1573c01ff327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct flowers can be seen in the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.4296875, "loss": 4.483578681945801 }, { "id": "identity_329931", "image": "TextVQA/images/a1a8116013e1bb8b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is wearing number 27?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 14.921875, "loss": 5.246927738189697 }, { "id": "identity_21710", "image": "PMC-VQA/images/PMC5446551_Fig6_169581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the microbleed lesions located? \nChoices:\n(A) Right basal ganglia \n(B) Left basal ganglia \n(C) Cerebellum \n(D) Frontal lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.078125, "loss": 4.110967636108398 }, { "id": "identity_43353", "image": "DVQA/images/bar_train_00048517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest 'critic' value?" }, { "from": "gpt", "value": "The answer is grave" } ], "clip_score": 23.546875, "loss": 4.09817361831665 }, { "id": "identity_231567", "image": "AI2D/images/485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the grass and cacti disappeared from this food web, which organisms would be directly and indirectly affected, and how would this impact the overall food web structure?" }, { "from": "gpt", "value": "The answer is The hawk would face significant food scarcity" } ], "clip_score": 37.28125, "loss": 4.081847667694092 }, { "id": "identity_129815", "image": "VQA2.0/images/502197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pigeons can be seen on the ground?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.140625, "loss": 4.38517427444458 }, { "id": "identity_300035", "image": "TQA/images/abc_question_images/earth_eclipses_11627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the Zone of Totality?\nChoices:\n(A) H\n(B) N\n(C) M\n(D) U" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.296875, "loss": 4.343994140625 }, { "id": "identity_275954", "image": "FigureQA/images/87630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Dark Gold? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.828125, "loss": 4.151488304138184 }, { "id": "identity_335925", "image": "A-OKVQA/images/574332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person's coat yellow in color?\nChoices:\n(A) dress code\n(B) fashion\n(C) visibility\n(D) camouflage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.71875, "loss": 4.003828048706055 }, { "id": "identity_300857", "image": "A-OKVQA/images/373395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this train here?\nChoices:\n(A) abandoned\n(B) is stuck\n(C) is broken\n(D) is station" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.828125, "loss": 4.136025905609131 }, { "id": "identity_23882", "image": "A-OKVQA/images/573973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the oval object connected to this person's keys?\nChoices:\n(A) carabiner\n(B) ubolt\n(C) key chain\n(D) knife" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.359375, "loss": 4.492137432098389 }, { "id": "identity_36622", "image": "VQA-AS/images/10460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tree bare?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.90625, "loss": 4.163543701171875 }, { "id": "identity_28677", "image": "A-OKVQA/images/546378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cat near?\nChoices:\n(A) dog\n(B) boxes\n(C) egg carton\n(D) books" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.65625, "loss": 4.373889923095703 }, { "id": "identity_35512", "image": "A-OKVQA/images/513854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is electricity being transported?\nChoices:\n(A) power lines\n(B) trucks\n(C) frisbee\n(D) clouds" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.078125, "loss": 4.116091251373291 }, { "id": "identity_184445", "image": "A-OKVQA/images/401917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cameras are mounted on tripods?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.015625, "loss": 5.206892967224121 }, { "id": "identity_277037", "image": "TQA/images/question_images/seasons_638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the planet depicted in the image? Is it:\n(A) Mars\n(B) Earth\n(C) Venus\n(D) Saturn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.765625, "loss": 4.066893577575684 }, { "id": "identity_184105", "image": "TextVQA/images/197c5fe966c56c3e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object on the left side of the coin?" }, { "from": "gpt", "value": "The answer is A Jug" } ], "clip_score": 26.828125, "loss": 6.231936931610107 }, { "id": "identity_56775", "image": "DVQA/images/bar_train_00021839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest absolute value in the 'cabin' row?" }, { "from": "gpt", "value": "The answer is manner" } ], "clip_score": 26.90625, "loss": 5.6944403648376465 }, { "id": "identity_296103", "image": "FigureQA/images/13325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Sky Blue or Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.84375, "loss": 4.625658988952637 }, { "id": "identity_338554", "image": "PMC-VQA/images/PMC3513975_fig1_170285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red arrows indicate in this image?\nChoices:\n(A) Blood flow \n(B) Normal brain tissue \n(C) Tumors \n(D) Inflammatory lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.296875, "loss": 4.632552146911621 }, { "id": "identity_317612", "image": "TextVQA/images/22c32952b17e0cbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of these wines?" }, { "from": "gpt", "value": "The answer is cadus" } ], "clip_score": 29.90625, "loss": 4.92182731628418 }, { "id": "identity_307271", "image": "TQA/images/abc_question_images/aquifers_16522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates a well affected by sea water?\nChoices:\n(A) J\n(B) M\n(C) H\n(D) X" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.90625, "loss": 4.289193153381348 }, { "id": "identity_314647", "image": "FigureQA/images/6736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the largest?\n\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.328125, "loss": 4.824668884277344 }, { "id": "identity_36556", "image": "TextVQA/images/e7fbafc374af79fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what text can you see?" }, { "from": "gpt", "value": "The answer is novia" } ], "clip_score": 12.6484375, "loss": 4.2557220458984375 }, { "id": "identity_299700", "image": "A-OKVQA/images/142263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which type of bird are those birds related? \n(A) raven\n(B) robin\n(C) peacock\n(D) dove" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.90625, "loss": 4.064941883087158 }, { "id": "identity_37903", "image": "DVQA/images/bar_train_00138423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the left in each group?" }, { "from": "gpt", "value": "The answer is appeal" } ], "clip_score": 23.390625, "loss": 4.297511100769043 }, { "id": "identity_283828", "image": "PMC-VQA/images/PMC2791877_fig5_52768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white lines in the image represent?\nChoices:\n(A) Connectivity between yellow nuclei \n(B) Connectivity between red nuclei \n(C) The basal membrane of the epithelium \n(D) Blood vessels in the tissue." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.59375, "loss": 4.348514556884766 }, { "id": "identity_107181", "image": "VQA2.0/images/299326.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the countertop?" }, { "from": "gpt", "value": "The answer is Dark color" } ], "clip_score": 22.734375, "loss": 4.578232288360596 }, { "id": "identity_32283", "image": "A-OKVQA/images/180488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After traversing over the jump what natural force will cause the boarder to return to the ground?\nChoices:\n(A) gravity\n(B) inertia\n(C) fission\n(D) kinetic energy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.8671875, "loss": 4.503044605255127 }, { "id": "identity_294367", "image": "Super-CLEVR/images/superCLEVR_new_000290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the metallic airplane that is the same size as the yellow jet?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 21.296875, "loss": 4.068546772003174 }, { "id": "identity_18034", "image": "DVQA/images/bar_train_00089909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the least units in any shop?" }, { "from": "gpt", "value": "The answer is shift" } ], "clip_score": 30.625, "loss": 4.267982482910156 }, { "id": "identity_61766", "image": "DocVQA/images/qfjg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person who wrote the letter?" }, { "from": "gpt", "value": "The answer is Production Manager" } ], "clip_score": 20.765625, "loss": 4.135729789733887 }, { "id": "identity_268951", "image": "VizWiz/images/VizWiz_train_00013954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is kitchen" } ], "clip_score": 21.84375, "loss": 4.476900100708008 }, { "id": "identity_23106", "image": "ScienceQA/images/20726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Carson City\n(B) Trenton\n(C) Las Vegas\n(D) Salem" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.890625, "loss": 4.101160526275635 }, { "id": "identity_213291", "image": "VizWiz/images/VizWiz_train_00009734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many large orange flowers are visible on the lamp?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.40625, "loss": 4.569745063781738 }, { "id": "identity_317240", "image": "A-OKVQA/images/328120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the people who own these bikes while this photo was taken?\nChoices:\n(A) at work\n(B) overseas\n(C) skydiving\n(D) home" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.46875, "loss": 4.083010673522949 }, { "id": "identity_282017", "image": "A-OKVQA/images/522819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of meal will be offered later? Will it be:\n(A) formal\n(B) buffet\n(C) casual\n(D) potluck" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.078125, "loss": 4.105358123779297 }, { "id": "identity_282794", "image": "A-OKVQA/images/519784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found on the ground beside the van?\nChoices:\n(A) arrow\n(B) footprints\n(C) coyote\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 5.43359375, "loss": 4.2704339027404785 }, { "id": "identity_112157", "image": "PMC-VQA/images/PMC3708829_pgen-1003641-g010_217436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"M\" signify in the image?" }, { "from": "gpt", "value": "The answer is The blue colored area on the right" } ], "clip_score": 16.734375, "loss": 4.067690849304199 }, { "id": "identity_26617", "image": "PMC-VQA/images/PMC7764608_diagnostics-10-01076-f009_328756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the cells commonly seen in the image? \nChoices:\n(A) Eosinophils \n(B) Lymphocytes \n(C) Neutrophils \n(D) Monocytes " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.78125, "loss": 4.318350791931152 }, { "id": "identity_270734", "image": "TextVQA/images/00c9f3f8e860c48f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which destination is the bus headed?" }, { "from": "gpt", "value": "The answer is carisa- hospital" } ], "clip_score": 25.96875, "loss": 5.17706823348999 }, { "id": "identity_228292", "image": "PMC-VQA/images/PMC6261832_rcr2392-fig-0002_402074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the specific lung segment shown in the lower section images and describe the anatomical landmarks that help in identifying this segment" }, { "from": "gpt", "value": "The answer is Right lower lung segment" } ], "clip_score": 26.9375, "loss": 4.500375270843506 }, { "id": "identity_333208", "image": "ScienceQA/images/14999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What best describes a population on an island in the Bahamas?\nChoices:\n(A) the rocky soil and the small trees\n(B) the brown anole lizards\n(C) the Bahamian boa constrictors and the curly-tailed lizards" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.125, "loss": 4.432693958282471 }, { "id": "identity_328443", "image": "PMC-VQA/images/PMC2478714_pone-0002853-g002_25662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in this image?\nChoices:\n(A) blood vessels\n(B) heart structures\n(C) tail structures\n(D) brain structures" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.71875, "loss": 4.731350421905518 }, { "id": "identity_110549", "image": "PMC-VQA/images/PMC5736520_Fig8_254782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located at the center of the image, indicated by the green color?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 26.71875, "loss": 4.060048580169678 }, { "id": "identity_308031", "image": "ScienceQA/images/1910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Orlando\n(B) Olympia\n(C) Denver\n(D) Spokane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.546875, "loss": 4.310233116149902 }, { "id": "identity_260339", "image": "ScienceQA/images/16294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are two properties that match this object, considering its color and material?" }, { "from": "gpt", "value": "The answer is yellow and flexible" } ], "clip_score": 25.171875, "loss": 4.0840163230896 }, { "id": "identity_266732", "image": "ScienceQA/images/19193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the organism that acts as the producer in this food web:\n(A) rotifer\n(B) green algae\n(C) shiner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.625, "loss": 4.067896366119385 }, { "id": "identity_269619", "image": "FigureQA/images/89250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.296875, "loss": 4.574835300445557 }, { "id": "identity_283120", "image": "A-OKVQA/images/77602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the activity being done by the individuals in the first row?\nChoices:\n(A) celebrating\n(B) sitting\n(C) eating\n(D) photographing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.5546875, "loss": 4.678404331207275 }, { "id": "identity_290920", "image": "ScienceQA/images/20812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could Leon's test potentially demonstrate based on the given choices?\nChoices:\n(A) how well the weather station would work when it was windy\n(B) if the weather station would work when the temperature was 50°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.71875, "loss": 4.062346935272217 }, { "id": "identity_19509", "image": "FigureQA/images/34870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 4.5606489181518555 }, { "id": "identity_305136", "image": "FigureQA/images/35426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.34375, "loss": 4.220151901245117 }, { "id": "identity_261882", "image": "TQA/images/abc_question_images/human_system_muscular_16163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can the deltoid be found?\nChoices:\n(A) V\n(B) H\n(C) P\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.390625, "loss": 4.626237392425537 }, { "id": "identity_266351", "image": "Super-CLEVR/images/superCLEVR_new_000413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other objects that share the same shape as the small red item?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 15.734375, "loss": 4.992802619934082 }, { "id": "identity_16151", "image": "ScienceQA/images/20086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) soft\n(B) translucent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.234375, "loss": 4.350192546844482 }, { "id": "identity_228723", "image": "A-OKVQA/images/61732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three specific features in the image that indicate the primary activity people are most likely engaging in, and explain how each feature supports this activity" }, { "from": "gpt", "value": "The answer is gambling" } ], "clip_score": 18.515625, "loss": 4.060003757476807 }, { "id": "identity_36784", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g005_437036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow indicate in the image? \nChoices:\n(A) Position of the EEG electrodes. \n(B) The negative BOLD response. \n(C) The relevant positive BOLD response. \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.828125, "loss": 4.9959564208984375 }, { "id": "identity_147563", "image": "DocVQA/images/rgvh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the suggested action regarding the report and budget?" }, { "from": "gpt", "value": "The answer is Accept report and approve requested budget" } ], "clip_score": 21.546875, "loss": 4.137470722198486 }, { "id": "identity_225349", "image": "VQA-AS/images/429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people and animals are in the room, and what is the color of the curtains?" }, { "from": "gpt", "value": "The answer is Three; white with floral patterns" } ], "clip_score": 27.671875, "loss": 4.671574592590332 }, { "id": "identity_78460", "image": "AI2D/images/1507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the feature in number 5 differ from the feature in number 3?" }, { "from": "gpt", "value": "The answer is Number 5 shows tilting and movement" } ], "clip_score": 24.3125, "loss": 4.288511276245117 }, { "id": "identity_194449", "image": "TextVQA/images/03e77cd31f9d6306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is visible on the back of the green shirt in the foreground?" }, { "from": "gpt", "value": "The answer is HAWKEYE" } ], "clip_score": 18.265625, "loss": 4.371009349822998 }, { "id": "identity_297622", "image": "PMC-VQA/images/PMC9273173_FIG3_337353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the arrow in the image?\nChoices:\n(A) cerebral artery thrombosis \n(B) cerebral bleeding \n(C) normal brain tissue \n(D) cerebral vein thrombosis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.578125, "loss": 4.263177871704102 }, { "id": "identity_214490", "image": "VQA2.0/images/352082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 13.625, "loss": 4.741118907928467 }, { "id": "identity_196244", "image": "A-OKVQA/images/395339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.546875, "loss": 4.1685686111450195 }, { "id": "identity_7350", "image": "A-OKVQA/images/25997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is he holding his hand out?\nChoices:\n(A) is pointing\n(B) to balance\n(C) is confused\n(D) to catch" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5234375, "loss": 4.191720008850098 }, { "id": "identity_296284", "image": "TextVQA/images/02a657b3b00696a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics used to describe the Tasmanian devil?" }, { "from": "gpt", "value": "The answer is bad company" } ], "clip_score": 11.2578125, "loss": 5.105988025665283 }, { "id": "identity_213466", "image": "A-OKVQA/images/524525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the carriage on the bridge?" }, { "from": "gpt", "value": "The answer is Brown or orange" } ], "clip_score": 21.578125, "loss": 4.0064802169799805 }, { "id": "identity_288462", "image": "PMC-VQA/images/PMC4101959_fig13_306893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image provided, where is the edema located?\nChoices:\n(A) Right thalamus\n(B) Left thalamus\n(C) Cerebellum\n(D) Brainstem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.40625, "loss": 4.370069980621338 }, { "id": "identity_136189", "image": "DocVQA/images/yxyk0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person listed under Snack Food Association?" }, { "from": "gpt", "value": "The answer is Pres" } ], "clip_score": 24.609375, "loss": 4.0536088943481445 }, { "id": "identity_13777", "image": "DVQA/images/bar_train_00097399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.609375, "loss": 4.19959831237793 }, { "id": "identity_321126", "image": "TQA/images/abc_question_images/earth_day_night_10096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents sunlight?\nChoices:\n(A) M\n(B) L\n(C) R\n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.328125, "loss": 4.581148147583008 }, { "id": "identity_71733", "image": "AI2D/images/3896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the stem in this plant?" }, { "from": "gpt", "value": "The answer is Support and transport" } ], "clip_score": 29.625, "loss": 4.0842814445495605 }, { "id": "identity_77783", "image": "AI2D/images/4751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different Maple tree leaf shapes are depicted in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.953125, "loss": 4.177631378173828 }, { "id": "identity_27554", "image": "IconQA/images/18005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.246384620666504 }, { "id": "identity_66224", "image": "ScienceQA/images/1072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are designed for children?" }, { "from": "gpt", "value": "The answer is Two objects" } ], "clip_score": 30.921875, "loss": 5.171692371368408 }, { "id": "identity_308602", "image": "PMC-VQA/images/PMC8600697_Fig4_115109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the orientation of this image of the tibial plateau in MC group?\nChoices:\n(A) longitudinal plane\n(B) vertical plane\n(C) coronal plane\n(D) horizontal plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.875, "loss": 4.428324222564697 }, { "id": "identity_14182", "image": "TQA/images/question_images/human_system_respiratory_3585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the larynx to the bronchus?\nChoices:\n(A) pharynx\n(B) carina of strachea\n(C) trachea\n(D) bronchi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.140625, "loss": 4.575037479400635 }, { "id": "identity_266002", "image": "PMC-VQA/images/PMC5311912_f6_128224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscopy was utilized to capture the micrograph? \nOptions:\n(A) Transmission electron microscopy (TEM)\n(B) Scanning tunneling microscopy (STM)\n(C) Confocal laser-scanning microscopy\n(D) Atomic force microscopy (AFM)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.15625, "loss": 4.411007404327393 }, { "id": "identity_289979", "image": "Super-CLEVR/images/superCLEVR_new_001968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the red rubber object that is the same size as the red shiny object?" }, { "from": "gpt", "value": "The answer is utility bike" } ], "clip_score": 26.328125, "loss": 5.041258335113525 }, { "id": "identity_73607", "image": "AI2D/images/1184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many centrioles are present in the animal cell diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.15625, "loss": 4.003746509552002 }, { "id": "identity_17911", "image": "A-OKVQA/images/119513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trick is the boy in grey performing?\nChoices:\n(A) manual\n(B) flip\n(C) grind\n(D) grab" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.609375, "loss": 4.359175205230713 }, { "id": "identity_37540", "image": "PMC-VQA/images/PMC8037803_jcm-10-01364-f001_414806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black line in the image represent?\nChoices:\n(A) The skin incision point \n(B) The surgical incision point \n(C) The area of the herniated disc \n(D) The location where the bone graft will be placed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.484375, "loss": 4.11700963973999 }, { "id": "identity_283871", "image": "PMC-VQA/images/PMC9229633_tomography-08-00115-f001_323519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the right images of each panel?\nChoices:\n(A) Temporal horn width measurements\n(B) Fronto-horn distances and Evans' index measurements\n(C) Ventricular size and shape\n(D) Hippocampal atrophy measurements" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.828125, "loss": 4.497715473175049 }, { "id": "identity_295285", "image": "PMC-VQA/images/PMC6013276_f0010_331796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of infarct was observed on the CT scan? \n(A) Hemorrhagic infarct \n(B) Embolic infarct \n(C) Lacunar infarct \n(D) Ischemic infarct" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.46875, "loss": 4.280569553375244 }, { "id": "identity_131419", "image": "TextVQA/images/7c03a446c0be1f0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sale signs can be seen in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.40625, "loss": 4.597792148590088 }, { "id": "identity_124813", "image": "Super-CLEVR/images/superCLEVR_new_001743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the seats visible through the windows of the bus?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.921875, "loss": 4.408607482910156 }, { "id": "identity_336666", "image": "IconQA/images/2948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 4.459926605224609 }, { "id": "identity_17016", "image": "ScienceQA/images/20468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Vermont?\nChoices:\n(A) Montpelier\n(B) Burlington\n(C) Dover\n(D) Providence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.9375, "loss": 4.164137840270996 }, { "id": "identity_276603", "image": "AI2D/images/3938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To which body part does label a correspond?\nChoices:\n(A) Heart\n(B) Small intestines\n(C) Stomach\n(D) Liver" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.4375, "loss": 4.402596473693848 }, { "id": "identity_296125", "image": "A-OKVQA/images/450263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of landmark are these individuals currently located at? Is it an amusement park, wharf, beach, or city park?\nChoices:\n(A) amusement park\n(B) wharf\n(C) beach\n(D) city park" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.140625, "loss": 4.309607028961182 }, { "id": "identity_267580", "image": "PMC-VQA/images/PMC4402572_fig10_379167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is depicted in the image?\nChoices:\n(A) Face mask \n(B) Brain mask \n(C) Heart mask \n(D) Liver mask" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.234375, "loss": 4.4246625900268555 }, { "id": "identity_301491", "image": "TQA/images/question_images/cycle_rock_6733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these rocks transform into after erosion and weathering?\nChoices:\n(A) Metamorphic rocks\n(B) Sediments\n(C) Magma\n(D) Volcanic eruptions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.953125, "loss": 4.168417453765869 }, { "id": "identity_293193", "image": "VQA2.0/images/296851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season is it currently?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 9.8359375, "loss": 4.123141288757324 }, { "id": "identity_22295", "image": "VQA2.0/images/336181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of buildings are at the far end of the street?" }, { "from": "gpt", "value": "The answer is stores" } ], "clip_score": 22.078125, "loss": 4.79559850692749 }, { "id": "identity_187171", "image": "PMC-VQA/images/PMC5394153_Fig4_153497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 27.984375, "loss": 4.353473663330078 }, { "id": "identity_270388", "image": "Super-CLEVR/images/superCLEVR_new_001855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a metal jet located behind the purple metal bus that is positioned to the left of the yellow object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.59375, "loss": 5.226105690002441 }, { "id": "identity_187532", "image": "VQA2.0/images/69733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the elephants' skin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 28.265625, "loss": 4.103058815002441 }, { "id": "identity_324596", "image": "PMC-VQA/images/PMC3807398_F6_239221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is stained green here?\nChoices:\n(A) α-tubulin\n(B) nuclei\n(C) eGFP\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.890625, "loss": 4.182466506958008 }, { "id": "identity_308501", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f006_203200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side shows anterior widening of this joint?\nChoices:\n(A) right side\n(B) left side\n(C) both sides\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.109375, "loss": 4.311882972717285 }, { "id": "identity_303488", "image": "PMC-VQA/images/PMC7898273_rcr2710-fig-0002_372020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structures are affected in these areas of the lungs?\nChoices:\n(A) Alveoli\n(B) Bronchial tubes\n(C) Air sacs\n(D) Nasal passages" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.140625, "loss": 5.40421199798584 }, { "id": "identity_20262", "image": "FigureQA/images/88824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Olive have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.9375, "loss": 4.570265293121338 }, { "id": "identity_319494", "image": "FigureQA/images/94031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua's value the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.4375, "loss": 4.001466274261475 }, { "id": "identity_302713", "image": "VQA2.0/images/239001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for the animal being here?" }, { "from": "gpt", "value": "The answer is hungry" } ], "clip_score": 14.78125, "loss": 4.062093257904053 }, { "id": "identity_260700", "image": "VQA2.0/images/376590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trick is the skateboarder performing, and what are the key elements that define this trick?" }, { "from": "gpt", "value": "The answer is An invert trick on a vert ramp" } ], "clip_score": 29.828125, "loss": 4.313148021697998 }, { "id": "identity_221525", "image": "VQA2.0/images/110105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the top of the sign next to the start line?" }, { "from": "gpt", "value": "The answer is Sm the Cupen" } ], "clip_score": 22.515625, "loss": 6.902931213378906 }, { "id": "identity_30558", "image": "IconQA/images/102098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.625, "loss": 4.172943592071533 }, { "id": "identity_35226", "image": "PMC-VQA/images/PMC4703759_F2_459831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the SEM images show? \nChoices:\n(A) Microbial mat on a barite chimney \n(B) SEM images of a microbial mat \n(C) Higher magnification of barite crystals \n(D) SEM images of a barite mine. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.203125, "loss": 4.530520915985107 }, { "id": "identity_38216", "image": "A-OKVQA/images/458079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of goods are sold in the shop depicted in the blue sign?\nChoices:\n(A) train tickets\n(B) snacks\n(C) souvenirs\n(D) coffee" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.59375, "loss": 4.132330417633057 }, { "id": "identity_302897", "image": "VizWiz/images/VizWiz_train_00002993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is veterinary health hospital" } ], "clip_score": 26.5625, "loss": 4.147954940795898 }, { "id": "identity_333787", "image": "IconQA/images/5674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a star. What is the seventh image?\nChoices:\n(A) rocket\n(B) moon\n(C) star" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.71875, "loss": 4.083466529846191 }, { "id": "identity_298019", "image": "PMC-VQA/images/PMC8083891_f1_430536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the red * in the image provided?\nChoices:\n(A) Esophageal hernia \n(B) Lung calcification \n(C) Pericardial effusion \n(D) Pulmonary nodule" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.4375, "loss": 4.139987468719482 }, { "id": "identity_281500", "image": "Super-CLEVR/images/superCLEVR_new_000832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the rubber object located behind the large blue bicycle?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 30.546875, "loss": 5.044633865356445 }, { "id": "identity_11307", "image": "Super-CLEVR/images/superCLEVR_new_000012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny brown car made of the same material as the brown tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.9375, "loss": 5.1820878982543945 }, { "id": "identity_330705", "image": "PMC-VQA/images/PMC3034685_F4_86438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red arrowheads indicate in these images?\nChoices:\n(A) Notochord staining\n(B) Anterior end\n(C) Dorsal side\n(D) Posterior end" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.171875, "loss": 4.693501949310303 }, { "id": "identity_324270", "image": "PMC-VQA/images/PMC3808353_pone-0078309-g003_239464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the solid line in the image?\nChoices:\n(A) The ventral border of the dorsal horn \n(B) The dorsal border of the dorsal horn \n(C) The boundary between laminae II and III \n(D) The boundary between laminae III and IV." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.21875, "loss": 4.1139960289001465 }, { "id": "identity_334813", "image": "FigureQA/images/13325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.067871570587158 }, { "id": "identity_28177", "image": "Super-CLEVR/images/superCLEVR_new_001874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the green car?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.328125, "loss": 4.008697032928467 }, { "id": "identity_238561", "image": "FigureQA/images/73798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in area between this color and Dark Orchid?" }, { "from": "gpt", "value": "The answer is Peru, 200 units²" } ], "clip_score": 25.40625, "loss": 4.715091705322266 }, { "id": "identity_286998", "image": "A-OKVQA/images/514293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item sold here is most effective in keeping people dry? \nChoices:\n(A) shirts\n(B) bottles\n(C) umbrellas\n(D) shoes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.2421875, "loss": 4.1246867179870605 }, { "id": "identity_326565", "image": "PMC-VQA/images/PMC7562849_ccr33031-fig-0001_274914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in the images?\nChoices:\n(A) Right pulmonary apex\n(B) Left pulmonary apex\n(C) Left lower lobe\n(D) Right lower lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.890625, "loss": 4.593932151794434 }, { "id": "identity_290950", "image": "A-OKVQA/images/284902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is predominant among the fruits?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) orange" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5703125, "loss": 4.254830837249756 }, { "id": "identity_1909", "image": "TabMWP/images/20377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A cable company analyst paid attention to how many new customers it had each day. On which day did the cable company have the fewest new customers?\nChoices:\n(A) Wednesday\n(B) Thursday\n(C) Friday\n(D) Saturday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.0, "loss": 4.1685333251953125 }, { "id": "identity_330544", "image": "PMC-VQA/images/PMC8005316_FIG3_405174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the signal hyperintensity shown here?\nChoices:\n(A) Nerve damage\n(B) Bone fracture\n(C) Tubular cysts within the tibial nerve\n(D) TSE technique artifact" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.359375, "loss": 4.810435771942139 }, { "id": "identity_312268", "image": "Super-CLEVR/images/superCLEVR_new_000109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the big shiny object on the left side of the matte tandem bike the same as the tiny gray shiny object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.25, "loss": 5.299614906311035 }, { "id": "identity_207612", "image": "A-OKVQA/images/533889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the roof of the structure on the right side of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 14.578125, "loss": 4.029177188873291 }, { "id": "identity_309119", "image": "DocVQA/images/ysbw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the meeting on adolescent medicine?" }, { "from": "gpt", "value": "The answer is Davis" } ], "clip_score": 17.671875, "loss": 5.075689792633057 }, { "id": "identity_291339", "image": "VizWiz/images/VizWiz_train_00004420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is table chair" } ], "clip_score": 26.734375, "loss": 4.219012260437012 }, { "id": "identity_329520", "image": "A-OKVQA/images/314297.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for him sitting on the skateboard?\nChoices:\n(A) is stuck\n(B) balancing\n(C) fell down\n(D) showing off" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.453125, "loss": 4.311042785644531 }, { "id": "identity_129154", "image": "TextVQA/images/9441b1942cfc32b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the door of the outhouse in the background?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.75, "loss": 4.336138725280762 }, { "id": "identity_22207", "image": "PMC-VQA/images/PMC7518237_F1_261377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the ADC map show in the corresponding areas? \nChoices:\n(A) Hypointense signal \n(B) Hyperintense signal \n(C) Mixed signal \n(D) No signal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.125, "loss": 4.246893882751465 }, { "id": "identity_329091", "image": "PMC-VQA/images/PMC7042499_fig2_124845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to measure metabolic uptake?\nChoices:\n(A) X-ray \n(B) Magnetic Resonance Imaging (MRI) \n(C) [ 18F ] fluorodeoxyglucose positron emission tomography ([ 18F]-FDG PET) \n(D) Computed Tomography (CT)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.390625, "loss": 4.214394569396973 }, { "id": "identity_270424", "image": "FigureQA/images/59489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Gold the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.953125, "loss": 4.31392765045166 }, { "id": "identity_310575", "image": "AI2D/images/4474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a term for an organ or structure that has a curved or bow-like shape?\nChoices:\n(A) none of the above\n(B) arch\n(C) figure\n(D) structure" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5, "loss": 4.308880805969238 }, { "id": "identity_319770", "image": "AI2D/images/3977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the center part of this diagram?\nChoices:\n(A) basal ring\n(B) radial spine\n(C) apical ring\n(D) apical window" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.78125, "loss": 4.211993217468262 }, { "id": "identity_197491", "image": "TextVQA/images/8994f0a839f29131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written on the side of the crate?" }, { "from": "gpt", "value": "The answer is JARDIENO" } ], "clip_score": 13.234375, "loss": 4.93107795715332 }, { "id": "identity_36892", "image": "IconQA/images/54855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner less likely to land?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.1058173179626465 }, { "id": "identity_213577", "image": "TextVQA/images/a01dc00ee58c3c76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many turnstiles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.671875, "loss": 4.567936897277832 }, { "id": "identity_269817", "image": "FigureQA/images/68572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes Dark Gold as the smoothest? \n(A) no \n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.640625, "loss": 4.4156575202941895 }, { "id": "identity_186666", "image": "PMC-VQA/images/PMC5832061_fig6_282327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 21.203125, "loss": 4.257645130157471 }, { "id": "identity_35909", "image": "TQA/images/question_images/erosion_8066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the picture?\nChoices:\n(A) Photosynthesis\n(B) Life Cycle\n(C) Erosion and Deposition by Flowing Water\n(D) Solar Heat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.6875, "loss": 4.114360809326172 }, { "id": "identity_20559", "image": "PMC-VQA/images/PMC6885221_fig1_76359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in the image?\nChoices:\n(A) Thickened basement membrane. \n(B) Simple squamous epithelium. \n(C) Pseudostratified columnar epithelium. \n(D) Goblet cell." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.5, "loss": 4.5963239669799805 }, { "id": "identity_283225", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of location is the area on the left with sign displays most likely to be?\nChoices:\n(A) shopping mall\n(B) strip mall\n(C) market\n(D) rest stop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.265625, "loss": 4.454219818115234 }, { "id": "identity_22306", "image": "A-OKVQA/images/479658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red item with the wheels?\nChoices:\n(A) space shuttle\n(B) tank\n(C) crane\n(D) sedan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.15625, "loss": 4.678081035614014 }, { "id": "identity_122357", "image": "TextVQA/images/93e0e0fa79dd09b6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the stairs leading up to the building?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.15625, "loss": 4.394745826721191 }, { "id": "identity_264495", "image": "PMC-VQA/images/PMC4391185_Fig3_376517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture these images? \nOptions:\n(A) X-ray\n(B) Magnetic Resonance Imaging (MRI)\n(C) Computed Tomography (CT)\n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.281189918518066 }, { "id": "identity_331415", "image": "ScienceQA/images/18408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown?\nChoices:\n(A) North America\n(B) Africa\n(C) Antarctica\n(D) Asia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.359375, "loss": 4.68278169631958 }, { "id": "identity_326088", "image": "PMC-VQA/images/PMC8399726_Fig4_46470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is depicted in the figure?\nChoices:\n(A) Sagittal T1-weighted image\n(B) Axial T2-weighted image\n(C) Coronal T2-weighted image\n(D) Sagittal T2-weighted image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.015625, "loss": 4.015412330627441 }, { "id": "identity_213118", "image": "A-OKVQA/images/3220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the mascot that is second from the left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.296875, "loss": 4.434362888336182 }, { "id": "identity_994", "image": "VizWiz/images/VizWiz_train_00015169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is dustin" } ], "clip_score": 16.78125, "loss": 4.715951919555664 }, { "id": "identity_32495", "image": "PMC-VQA/images/PMC7163002_Fig7_153241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green contour in the image? \nChoices:\n(A) Phantom Edge on US \n(B) Phantom edge on MRI \n(C) Pre-segmented prostate capsule from Pre-procedural MRI \n(D) OD " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.265625, "loss": 4.150620460510254 }, { "id": "identity_294698", "image": "IconQA/images/64765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nOptions:\n(A) rotate\n(B) invert\n(C) shift" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.375, "loss": 5.089003562927246 }, { "id": "identity_270042", "image": "AI2D/images/3042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label in the diagram represents Centrioles?\nChoices:\n(A) D\n(B) F\n(C) L\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.109375, "loss": 4.216497898101807 }, { "id": "identity_287139", "image": "IconQA/images/79993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the third one if the first picture is a bear?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.09375, "loss": 4.179570198059082 }, { "id": "identity_309743", "image": "A-OKVQA/images/135336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the cement structures in the water for?\nChoices:\n(A) floaty\n(B) protecting shore\n(C) decoration\n(D) pier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3359375, "loss": 4.396141052246094 }, { "id": "identity_313133", "image": "A-OKVQA/images/471737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who invented this?\nChoices:\n(A) richard trevithick\n(B) orville wright\n(C) jeff goldblum\n(D) bill nye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.234375, "loss": 4.018850803375244 }, { "id": "identity_199846", "image": "A-OKVQA/images/476925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cards are visible on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.15625, "loss": 4.439985752105713 }, { "id": "identity_322056", "image": "A-OKVQA/images/65001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What body part did most people use to stop their vehicles?\nChoices:\n(A) foot\n(B) hand\n(C) eye\n(D) hip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.265625, "loss": 4.4982008934021 }, { "id": "identity_259462", "image": "ScienceQA/images/19025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is the marked city located in, and what is the capital of that state?" }, { "from": "gpt", "value": "The answer is Olympia" } ], "clip_score": 25.046875, "loss": 4.106125354766846 }, { "id": "identity_143899", "image": "DocVQA/images/rnww0217_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the publication status of the topic under discussion?" }, { "from": "gpt", "value": "The answer is Under discussion" } ], "clip_score": 23.640625, "loss": 4.771090507507324 }, { "id": "identity_202651", "image": "PMC-VQA/images/PMC5432911_Fig8_165194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the anthers at the same stage of development?" }, { "from": "gpt", "value": "The answer is No, 'h'' is more developed" } ], "clip_score": 30.03125, "loss": 4.344391345977783 }, { "id": "identity_59613", "image": "DocVQA/images/rxvw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the email?" }, { "from": "gpt", "value": "The answer is Pub Planning Committee Members" } ], "clip_score": 24.15625, "loss": 4.371747016906738 }, { "id": "identity_19110", "image": "FigureQA/images/89086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Indigo the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.59375, "loss": 4.265786647796631 }, { "id": "identity_273789", "image": "TQA/images/abc_question_images/parts_microscope_17189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the photo port in the diagram? Is it:\n(A) J\n(B) P\n(C) E\n(D) W" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 4.683289527893066 }, { "id": "identity_318180", "image": "PMC-VQA/images/PMC7366709_Fig3_215539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in the ultra-wide field fluorescein angiography?\nChoices:\n(A) Peripheral non-perfusion area\n(B) Pre-retinal neovascular lesions\n(C) Hemorrhages\n(D) Macular degeneration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.59375, "loss": 4.298527717590332 }, { "id": "identity_217392", "image": "PMC-VQA/images/PMC6360734_Fig1_431568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label marked with 'a' indicating in the image?" }, { "from": "gpt", "value": "The answer is Apical view" } ], "clip_score": 21.0625, "loss": 4.475774765014648 }, { "id": "identity_38458", "image": "IconQA/images/23739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.84375, "loss": 4.016809940338135 }, { "id": "identity_169693", "image": "IconQA/images/27122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more flowers in the first two rows or the last two rows?" }, { "from": "gpt", "value": "The answer is The same number" } ], "clip_score": 32.625, "loss": 4.218318462371826 }, { "id": "identity_268414", "image": "TabMWP/images/15688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 4.6118574142456055 }, { "id": "identity_306280", "image": "AI2D/images/4381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to didactylie?\nChoices:\n(A) B\n(B) A\n(C) F\n(D) L" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.6875, "loss": 4.359249591827393 }, { "id": "identity_219488", "image": "VQA2.0/images/393664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light poles are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.3828125, "loss": 4.6387739181518555 }, { "id": "identity_12500", "image": "ScienceQA/images/11814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is highlighted?\nChoices:\n(A) South America\n(B) Antarctica\n(C) North America\n(D) Europe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.421875, "loss": 4.629554271697998 }, { "id": "identity_116530", "image": "VQA2.0/images/188329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written on the surfboard that is second from the right inside the shop?" }, { "from": "gpt", "value": "The answer is Moluske" } ], "clip_score": 31.734375, "loss": 5.021265983581543 }, { "id": "identity_273799", "image": "DocVQA/images/xfmd0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Leigh A. Beck currently located?" }, { "from": "gpt", "value": "The answer is senior director" } ], "clip_score": 24.25, "loss": 4.735998153686523 }, { "id": "identity_14512", "image": "FigureQA/images/42731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.28125, "loss": 4.019850730895996 }, { "id": "identity_299921", "image": "PMC-VQA/images/PMC8874131_F2_204782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of fluorescence images were captured among the following options: X-ray images, NIR fluorescence images, PET-CT images, or Ultrasound images?\nChoices:\n(A) X-ray images \n(B) NIR fluorescence images \n(C) PET-CT images \n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.578125, "loss": 4.024897575378418 }, { "id": "identity_327847", "image": "PMC-VQA/images/PMC6398457_BIO036939F3_443767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure indicated by arrows in this image?\nChoices:\n(A) Spermatid heads\n(B) Plasma membranes\n(C) Septa-like pattern\n(D) Cyst Cells" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.578125, "loss": 5.269347190856934 }, { "id": "identity_7002", "image": "PMC-VQA/images/PMC7564512_jcm-09-03040-f002_275604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is affected in the wrist? \nChoices:\n(A) Peripheral nerve \n(B) Tendon \n(C) Capsule \n(D) Fascia " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.25, "loss": 4.180513858795166 }, { "id": "identity_26608", "image": "VQA2.0/images/17712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does that boat have a flag?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.21875, "loss": 4.088784217834473 }, { "id": "identity_270935", "image": "PMC-VQA/images/PMC4619311_pone.0140106.g004_436645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cavity is shown in the image provided?\nOptions:\n(A) Circular cavity\n(B) Transverse cavity\n(C) Longitudinal cavity\n(D) Spherical cavity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.4375, "loss": 4.5767717361450195 }, { "id": "identity_287927", "image": "PMC-VQA/images/PMC5368167_F1_145882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI was utilized to capture the image: T1-weighted MRI, T2-weighted MRI, CT scan, or PET scan?\nChoices:\n(A) T1-weighted MRI \n(B) T2-weighted MRI \n(C) CT scan \n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.3125, "loss": 4.246111869812012 }, { "id": "identity_33938", "image": "PMC-VQA/images/PMC7591820_F2_283632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the soft tissue mass located? \nChoices:\n(A) Left nasal cavity \n(B) Maxillary sinus \n(C) Right nasal cavity \n(D) Ethmoid sinuses " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.609375, "loss": 4.5284295082092285 }, { "id": "identity_134001", "image": "TextVQA/images/7a963898de8b8941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the genre of the book?" }, { "from": "gpt", "value": "The answer is Mystery fiction" } ], "clip_score": 24.125, "loss": 4.428564548492432 }, { "id": "identity_276783", "image": "FigureQA/images/74847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.109375, "loss": 4.032110214233398 }, { "id": "identity_27241", "image": "PMC-VQA/images/PMC9224862_jcm-11-03399-f001_321101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is used in this image? \nChoices:\n(A) MRI \n(B) Ultrasound \n(C) CT scan \n(D) X-ray. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.484375, "loss": 4.739621162414551 }, { "id": "identity_297299", "image": "PMC-VQA/images/PMC6372518_F1_434790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scan was utilized to capture the image: CT scan, MRI scan, X-ray scan, or PET scan?\nChoices:\n(A) CT scan \n(B) MRI scan \n(C) X-ray scan \n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0, "loss": 4.175789833068848 }, { "id": "identity_335564", "image": "A-OKVQA/images/319521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person's occupation?\nChoices:\n(A) doctor\n(B) salesman\n(C) lifeguard\n(D) officer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.84375, "loss": 4.349379539489746 }, { "id": "identity_2888", "image": "DVQA/images/bar_train_00140328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the palegoldenrod color represent?" }, { "from": "gpt", "value": "The answer is stage" } ], "clip_score": 22.046875, "loss": 4.167341232299805 }, { "id": "identity_329771", "image": "PMC-VQA/images/PMC5034476_Fig4_76747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of radiograph is depicted in this image?\nChoices:\n(A) Bitewing radiograph\n(B) Periapical radiograph\n(C) Occlusal radiograph\n(D) Panoramic radiograph" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.96875, "loss": 4.236838340759277 }, { "id": "identity_283867", "image": "ScienceQA/images/4555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among Iowa, Oklahoma, Wyoming, and Nevada, which state is located farthest to the east?\nChoices:\n(A) Iowa\n(B) Oklahoma\n(C) Wyoming\n(D) Nevada" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.65625, "loss": 4.003304958343506 }, { "id": "identity_211483", "image": "TextVQA/images/8e67cebf6512806e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last feature mentioned on the right side of the image?" }, { "from": "gpt", "value": "The answer is Control weather & stock market" } ], "clip_score": 17.0625, "loss": 4.160940647125244 }, { "id": "identity_36846", "image": "ScienceQA/images/8085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) South Carolina\n(B) Pennsylvania\n(C) Indiana\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.3125, "loss": 4.125193119049072 }, { "id": "identity_301835", "image": "DVQA/images/bar_train_00045868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars?" }, { "from": "gpt", "value": "The answer is corner" } ], "clip_score": 17.90625, "loss": 4.404085636138916 }, { "id": "identity_248006", "image": "VQA2.0/images/74421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of tinsel are visible in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.640625, "loss": 4.375585079193115 }, { "id": "identity_34550", "image": "Super-CLEVR/images/superCLEVR_new_000632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a gray bus that is on the right side of the thing left of the aeroplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.15625, "loss": 5.159379005432129 }, { "id": "identity_30419", "image": "A-OKVQA/images/166067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are all the trucks in back of the boat?\nChoices:\n(A) stole them\n(B) hauling them\n(C) part boat\n(D) hiding them" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.578125, "loss": 4.198714256286621 }, { "id": "identity_32291", "image": "PMC-VQA/images/PMC5519540_Fig3_193343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dotted lines indicate in the images? \nChoices:\n(A) Blood vessels. \n(B) Outline of the lungs. \n(C) Outline of the heart. \n(D) Outline of the digestive system. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 4.188271999359131 }, { "id": "identity_31678", "image": "A-OKVQA/images/579862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle are the helmeted people riding?\nChoices:\n(A) unicycle\n(B) moped\n(C) bike\n(D) scooter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.15625, "loss": 4.287772178649902 }, { "id": "identity_292916", "image": "A-OKVQA/images/146881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the message displayed at the top of the bus?\nChoices:\n(A) red\n(B) shuttle\n(C) open\n(D) closed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 4.147589206695557 }, { "id": "identity_20202", "image": "VizWiz/images/VizWiz_train_00019764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does this room look?" }, { "from": "gpt", "value": "The answer is clean" } ], "clip_score": 27.90625, "loss": 4.730592727661133 }, { "id": "identity_281043", "image": "Super-CLEVR/images/superCLEVR_new_000533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To the left of the large blue shiny motorbike on the left side of the big green rubber car, is there a big brown shiny fighter to the left of the large motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 30.46875, "loss": 5.2313971519470215 }, { "id": "identity_306913", "image": "FigureQA/images/70575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Burlywood?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8515625, "loss": 4.262414455413818 }, { "id": "identity_294878", "image": "IconQA/images/106578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a circle as a face in this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.0625, "loss": 4.155156135559082 }, { "id": "identity_290394", "image": "PMC-VQA/images/PMC5751172_ijms-18-02569-f010_259539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the cage is depicted in the image? \nOptions:\n(A) External surface \n(B) Top surface \n(C) Internal surface \n(D) Left surface" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.890625, "loss": 4.193340301513672 }, { "id": "identity_131364", "image": "TextVQA/images/01e3e2e8f8e6808c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street located at the bottom of the key?" }, { "from": "gpt", "value": "The answer is Barrel Street" } ], "clip_score": 8.1796875, "loss": 4.785361289978027 }, { "id": "identity_331801", "image": "PMC-VQA/images/PMC5797361_Fig1_271410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the second column?\nChoices:\n(A) CT tomographic images of the preimplant plan\n(B) Locations of needles before implantation\n(C) Images of the postimplant plan\n(D) Dosimetric evaluations of GTV and adjacent OARs" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.234375, "loss": 4.421619892120361 }, { "id": "identity_288660", "image": "PMC-VQA/images/PMC5106057_F2_94700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which chamber exhibited dilation among the following options: right atria, left atria, right ventricle, or left ventricle?\nChoices:\n(A) right atria \n(B) left atria \n(C) right ventricle \n(D) left ventricle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.75, "loss": 4.0933685302734375 }, { "id": "identity_2890", "image": "PMC-VQA/images/PMC7682978_F2_303278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the chest CT scan of the patient reveal? \nChoices:\n(A) Kidney stones \n(B) Brain tumor \n(C) Bilateral infiltrations \n(D) Lung nodules. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.796875, "loss": 4.661762237548828 }, { "id": "identity_69103", "image": "AI2D/images/2067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the secondary consumer in this food web" }, { "from": "gpt", "value": "The answer is Bull snake" } ], "clip_score": 28.0, "loss": 4.611067771911621 }, { "id": "identity_287072", "image": "DocVQA/images/zhmw0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide information about Neal Ball?" }, { "from": "gpt", "value": "The answer is vice president" } ], "clip_score": 17.046875, "loss": 4.639059543609619 }, { "id": "identity_31457", "image": "ScienceQA/images/15715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) breakable\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2265625, "loss": 4.415182113647461 }, { "id": "identity_318173", "image": "PMC-VQA/images/PMC6264615_Fig2_403227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical procedure shown in the caption? \nChoices:\n(A) CT scan \n(B) Echocardiogram \n(C) Thoracoscopy \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.40625, "loss": 4.34260892868042 }, { "id": "identity_12173", "image": "AI2D/images/3633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: where the heart place?\nChoices:\n(A) hand\n(B) leg\n(C) head\n(D) chest" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.75, "loss": 4.250868797302246 }, { "id": "identity_331091", "image": "PMC-VQA/images/PMC5960698_F2_316506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this depict?\nChoices:\n(A) Edema of something (black arrow).\n(B) Atrophy of something (black arrow).\n(C) Enlarged fourth ventricle without any changes in something.\n(D) Enlarged something (black arrow) within the small fourth ventricle." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.28125, "loss": 4.418511390686035 }, { "id": "identity_334158", "image": "TextVQA/images/06628eaed257fa24.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is on one of the hot air balloons?" }, { "from": "gpt", "value": "The answer is when" } ], "clip_score": 26.796875, "loss": 5.33228063583374 }, { "id": "identity_62880", "image": "ScienceQA/images/1270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different sports are depicted in this advertisement?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.421875, "loss": 4.5143232345581055 }, { "id": "identity_237097", "image": "PlotQA/images/51121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total trade (sum of imports and exports) for each country, and which country has the highest total trade?" }, { "from": "gpt", "value": "The answer is Georgia" } ], "clip_score": 29.390625, "loss": 5.684839725494385 }, { "id": "identity_327143", "image": "PMC-VQA/images/PMC5503588_F3_188338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is depicted in the image?\nChoices:\n(A) Lateral radiograph\n(B) Oblique radiograph\n(C) AP radiograph\n(D) PA radiograph" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.4720234870910645 }, { "id": "identity_138576", "image": "DVQA/images/bar_train_00150154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"beauty\"?" }, { "from": "gpt", "value": "The answer is panel" } ], "clip_score": 30.6875, "loss": 4.200537204742432 }, { "id": "identity_92554", "image": "TabMWP/images/17155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of characters in the books Liam read?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 35.0625, "loss": 4.026268005371094 }, { "id": "identity_289876", "image": "ScienceQA/images/7806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Emmet's experiment to answer:\n(A) Does the presence of holes in bags affect the development of brown spots on bananas compared to bags without holes?\n(B) Does the temperature at which bananas are stored affect the development of brown spots, comparing room temperature to a cold refrigerator?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.515625, "loss": 4.005127906799316 }, { "id": "identity_2321", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is being sold at the stall on the left?" }, { "from": "gpt", "value": "The answer is bun mang vit" } ], "clip_score": 25.359375, "loss": 4.74965238571167 }, { "id": "identity_294202", "image": "A-OKVQA/images/293081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of water body is most likely to be connected to this dock?\nChoices:\n(A) river\n(B) ocean\n(C) lake\n(D) sea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.734375, "loss": 4.3639235496521 }, { "id": "identity_229273", "image": "ScienceQA/images/1345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the organisms that are directly consumed by the omnivore in this food web, and determine the total number of such organisms" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 30.859375, "loss": 5.075980186462402 }, { "id": "identity_337927", "image": "A-OKVQA/images/21926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is depicted?\nChoices:\n(A) coastal\n(B) beach\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.046875, "loss": 4.596795558929443 }, { "id": "identity_31107", "image": "PMC-VQA/images/PMC4311646_F2_353925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to identify the distribution pattern of dPG35S amine in the kidney? \nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.140625, "loss": 4.124970436096191 }, { "id": "identity_71059", "image": "ScienceQA/images/3684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic feature can be observed in the rock shown in the image that indicates its metamorphic nature?" }, { "from": "gpt", "value": "The answer is Fused and interlocked grains" } ], "clip_score": 30.421875, "loss": 4.208937644958496 }, { "id": "identity_16580", "image": "TQA/images/question_images/seasons_1448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Autumnal Equinox happens in the northern hemisphere during September, what happens in the southern hemisphere at the same time?\nChoices:\n(A) Vernal Equinox\n(B) Winter Solstice\n(C) Summer Solstice\n(D) Autumnal Equinox" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.25, "loss": 4.116403102874756 }, { "id": "identity_293424", "image": "PMC-VQA/images/PMC3618245_F11_197084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows indicate in the VA scan?\nChoices:\n(A) Blood vessels\n(B) Calcifications\n(C) Nodules\n(D) Cysts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.175095558166504 }, { "id": "identity_238613", "image": "TextVQA/images/8aebd2d37e832dff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the slogan on the Coca-Cola billboard?" }, { "from": "gpt", "value": "The answer is Make someone happy\" and \"open happiness" } ], "clip_score": 27.859375, "loss": 4.129019260406494 }, { "id": "identity_245050", "image": "DocVQA/images/txpp0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: List the places of origin for all individuals in the image and identify which place is located outside of the United States. What is the name of the individual from that place?" }, { "from": "gpt", "value": "The answer is Barry E. Sealey" } ], "clip_score": 21.25, "loss": 4.052011013031006 }, { "id": "identity_334172", "image": "VQA-AS/images/8709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sun low?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.6875, "loss": 4.1738433837890625 }, { "id": "identity_124322", "image": "TextVQA/images/e64a84efadc73b8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the red bottle?" }, { "from": "gpt", "value": "The answer is PITU" } ], "clip_score": 17.421875, "loss": 4.312349796295166 }, { "id": "identity_206032", "image": "VQA-AS/images/4555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cushion on the left side of the sofa?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.28125, "loss": 4.859116554260254 }, { "id": "identity_3066", "image": "PMC-VQA/images/PMC7939567_F0002_386113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed from the MRI scan? \nChoices:\n(A) Right thalamus infarction \n(B) No abnormality detected in either thalamus \n(C) Left thalamus infarction \n(D) Spinal cord injury " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.796875, "loss": 4.055081844329834 }, { "id": "identity_280617", "image": "PMC-VQA/images/PMC3581533_pone-0057691-g002_187890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the tumor region in gross pathology? \nChoices:\n(A) Calcifications \n(B) Necrosis \n(C) Hemorrhage \n(D) Fibrosis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.109375, "loss": 5.159112453460693 }, { "id": "identity_200151", "image": "A-OKVQA/images/308034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of fruits can be seen on the boat?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 26.1875, "loss": 4.5541181564331055 }, { "id": "identity_283292", "image": "Geometry3K/images/452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the measure of angle JMK?\nChoices:\n(A) 79\n(B) 101\n(C) 102\n(D) 103" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.484375, "loss": 4.513997554779053 }, { "id": "identity_227260", "image": "Super-CLEVR/images/superCLEVR_new_001820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the thing that is in front of the small aeroplane that is behind the tiny purple fighter, and what is the material of the object that is to the left of the tiny purple fighter?" }, { "from": "gpt", "value": "The answer is Orange, metal" } ], "clip_score": 27.859375, "loss": 4.5324249267578125 }, { "id": "identity_36032", "image": "PMC-VQA/images/PMC3374820_pone-0038732-g005_141511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which regions are marked with white squares in the image? \nChoices:\n(A) C and D \n(B) A and B \n(C) Only A \n(D) Only B " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.890625, "loss": 4.064207553863525 }, { "id": "identity_196831", "image": "PMC-VQA/images/PMC9414956_microorganisms-10-01640-f003_385876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of bone healing or callus formation in this image?" }, { "from": "gpt", "value": "The answer is No visible healing" } ], "clip_score": 26.84375, "loss": 4.004856109619141 }, { "id": "identity_318825", "image": "ScienceQA/images/16740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Haiti\n(B) Trinidad and Tobago\n(C) Barbados\n(D) Grenada" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.720778465270996 }, { "id": "identity_337713", "image": "VizWiz/images/VizWiz_train_00010672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this say?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 10.859375, "loss": 4.056233882904053 }, { "id": "identity_322765", "image": "ScienceQA/images/12060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these two items have in common?\nChoices:\n(A) slippery\n(B) salty" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 4.138689994812012 }, { "id": "identity_312473", "image": "ScienceQA/images/11814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted area?\nChoices:\n(A) South America\n(B) Antarctica\n(C) North America\n(D) Europe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.28125, "loss": 4.735414981842041 }, { "id": "identity_284018", "image": "PMC-VQA/images/PMC5815729_F1_276744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which method was utilized for image registration among the following options: (A) CT0, (B) CT50, (C) Rigid, or (D) Manual?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.53125, "loss": 4.099671840667725 }, { "id": "identity_8859", "image": "ScienceQA/images/11372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Abdul's experiment can best answer.\nChoices:\n(A) Do steel nails rust in fewer days when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails take fewer days to rust in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.359375, "loss": 4.172252655029297 }, { "id": "identity_288654", "image": "VizWiz/images/VizWiz_train_00005917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please inform me about the nature of this product?" }, { "from": "gpt", "value": "The answer is spynet" } ], "clip_score": 32.65625, "loss": 4.772515296936035 }, { "id": "identity_221965", "image": "TextVQA/images/3877144880f54acd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of cleaning tasks are mentioned in point number 6?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.90625, "loss": 4.096920490264893 }, { "id": "identity_29075", "image": "PMC-VQA/images/PMC4839123_Fig1_23080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used for the early detection of bone metastases? \nChoices:\n(A) X-ray \n(B) CT scan \n(C) MRI \n(D) PET scan " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.21875, "loss": 4.346648216247559 }, { "id": "identity_285086", "image": "A-OKVQA/images/528527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which source of energy is used to power the splash? \n(A) electricity \n(B) solar \n(C) wind \n(D) manual power" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.796875, "loss": 4.161911487579346 }, { "id": "identity_337408", "image": "IconQA/images/83618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, what is the chance that it will be a black one?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.28125, "loss": 4.510043144226074 }, { "id": "identity_29", "image": "DVQA/images/bar_train_00018790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there?" }, { "from": "gpt", "value": "The answer is nine" } ], "clip_score": 21.28125, "loss": 4.143438816070557 }, { "id": "identity_313268", "image": "A-OKVQA/images/237463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who has lighter hair than this individual?\nChoices:\n(A) margaret qualley\n(B) penelope cruz\n(C) taylor swift\n(D) natalie portman" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.2734375, "loss": 4.005552768707275 }, { "id": "identity_333473", "image": "ScienceQA/images/11997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Ohio\n(B) Kentucky\n(C) West Virginia\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.90625, "loss": 4.026097774505615 }, { "id": "identity_194089", "image": "TextVQA/images/e6b1ae1ea0e0c099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pitcher's mound covering?" }, { "from": "gpt", "value": "The answer is Tan" } ], "clip_score": 22.65625, "loss": 4.043754577636719 }, { "id": "identity_180958", "image": "IconQA/images/60014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which picture is second to the last?" }, { "from": "gpt", "value": "The answer is Bear" } ], "clip_score": 17.640625, "loss": 4.347601890563965 }, { "id": "identity_4885", "image": "A-OKVQA/images/16574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time frame is this image depicting?\nChoices:\n(A) modern times\n(B) 80's\n(C) medieval times\n(D) b.c" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.953125, "loss": 4.160534858703613 }, { "id": "identity_139169", "image": "DocVQA/images/qjgn0226_192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the royalty product mentioned in the request for payment?" }, { "from": "gpt", "value": "The answer is Vanguard - Monoblock" } ], "clip_score": 19.21875, "loss": 4.475417613983154 }, { "id": "identity_29393", "image": "A-OKVQA/images/312902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can the clinic on the right help you with?\nChoices:\n(A) eyes\n(B) feet\n(C) teeth\n(D) back" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.53125, "loss": 4.155339241027832 }, { "id": "identity_266446", "image": "VizWiz/images/VizWiz_train_00023547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of car this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.5, "loss": 4.340751647949219 }, { "id": "identity_298260", "image": "PMC-VQA/images/PMC4681317_fig04_453771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of brain tumor depicted in the image from the following options:\n(A) Meningioma\n(B) Glioblastoma\n(C) Astrocytoma\n(D) Pituitary adenoma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.640625, "loss": 4.228238105773926 }, { "id": "identity_495", "image": "FigureQA/images/54633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru greater than Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.59375, "loss": 4.179070472717285 }, { "id": "identity_264751", "image": "ScienceQA/images/10067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? \nOptions:\n(A) Grenada\n(B) Cuba\n(C) Jamaica\n(D) Saint Vincent and the Grenadines" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.96875, "loss": 4.958414077758789 }, { "id": "identity_151296", "image": "ScienceQA/images/12259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many petals can be observed in the flower shown in the image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.359375, "loss": 4.191667079925537 }, { "id": "identity_233113", "image": "Super-CLEVR/images/superCLEVR_new_001554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shiny bus to the left of the blue rubber bicycle larger than the blue matte motorbike and the green matte car combined?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 31.734375, "loss": 5.181922435760498 }, { "id": "identity_107735", "image": "PMC-VQA/images/PMC3795743_pone-0076645-g003_237342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object in the bottom left corner?" }, { "from": "gpt", "value": "The answer is Irregular and roundish" } ], "clip_score": 18.09375, "loss": 4.278596878051758 }, { "id": "identity_308484", "image": "A-OKVQA/images/301977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What style is spelled using the first five white letters on the sign?\nChoices:\n(A) bohemian\n(B) gangnam\n(C) grunge\n(D) tiger" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.7265625, "loss": 4.246445178985596 }, { "id": "identity_10439", "image": "TQA/images/abc_question_images/human_system_muscular_16164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label marks the deltoid muscle?\nChoices:\n(A) E\n(B) Y\n(C) M\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.046875, "loss": 4.253526210784912 }, { "id": "identity_151695", "image": "ScienceQA/images/3058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 31.859375, "loss": 4.327286720275879 }, { "id": "identity_303038", "image": "VQA-RAD/images/synpic26764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there abnormal inflation of the lung?" }, { "from": "gpt", "value": "The answer is Hyperinflation" } ], "clip_score": 24.453125, "loss": 4.275654315948486 }, { "id": "identity_296561", "image": "A-OKVQA/images/236882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the gas tank on the Harley bike located in the center of the pack? Is it blue, yellow, red, or gold?\nChoices:\n(A) blue\n(B) yellow\n(C) red\n(D) gold" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.625, "loss": 4.4542317390441895 }, { "id": "identity_201744", "image": "TextVQA/images/da5c7184d414515e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is displayed on the sign behind the person on the right?" }, { "from": "gpt", "value": "The answer is Plan to Graduate" } ], "clip_score": 16.984375, "loss": 4.214295387268066 }, { "id": "identity_334697", "image": "A-OKVQA/images/444619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are individuals sitting on the sidewalk?\nChoices:\n(A) watching parade\n(B) watching traffic\n(C) as punishment\n(D) resting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 4.291101932525635 }, { "id": "identity_268392", "image": "AI2D/images/2880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to refer to the outer part of the ear?\nChoices:\n(A) Eardrum\n(B) Pinna\n(C) None of the above\n(D) Canal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.34375, "loss": 4.201998710632324 }, { "id": "identity_193530", "image": "PMC-VQA/images/PMC4779085_Fig5_8741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of valve regurgitation in this echocardiography image?" }, { "from": "gpt", "value": "The answer is Requires analysis of color flow for reverse signal" } ], "clip_score": 29.625, "loss": 4.926999568939209 }, { "id": "identity_70102", "image": "ScienceQA/images/4275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are partially visible in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.9375, "loss": 5.08627986907959 }, { "id": "identity_317431", "image": "TQA/images/abc_question_images/human_system_excretory_16114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label identifies the organ directly above this?\nChoices:\n(A) L\n(B) F\n(C) E\n(D) W" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.78125, "loss": 4.145744323730469 }, { "id": "identity_186281", "image": "A-OKVQA/images/211892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the image?" }, { "from": "gpt", "value": "The answer is Five dogs" } ], "clip_score": 19.5625, "loss": 4.2977681159973145 }, { "id": "identity_254209", "image": "A-OKVQA/images/423313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company with the red sign, and who owns it?" }, { "from": "gpt", "value": "The answer is Footstep, yao khaphu" } ], "clip_score": 20.171875, "loss": 6.420682907104492 }, { "id": "identity_19031", "image": "ScienceQA/images/1549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this passage describe the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.1953125, "loss": 4.205493450164795 }, { "id": "identity_35926", "image": "A-OKVQA/images/464812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What league would they want to play in?\nChoices:\n(A) nba\n(B) mlb\n(C) nfl\n(D) nhl" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.375, "loss": 4.193122863769531 }, { "id": "identity_317850", "image": "TQA/images/question_images/parts_microscope_7180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of this lens?\nChoices:\n(A) To allow light to pass through the stage\n(B) For balance\n(C) The amount of zoom of the image\n(D) To hold the object" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.015625, "loss": 5.179261684417725 }, { "id": "identity_26172", "image": "A-OKVQA/images/136300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these animals have?\nChoices:\n(A) horns\n(B) quills\n(C) hooves\n(D) gills" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.6484375, "loss": 4.404448509216309 }, { "id": "identity_12460", "image": "FigureQA/images/85017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Red have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 4.414124488830566 }, { "id": "identity_144819", "image": "DocVQA/images/ftmf0024_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the key tactic mentioned in the document?" }, { "from": "gpt", "value": "The answer is B-BQ with Social Clubs" } ], "clip_score": 22.203125, "loss": 4.171236515045166 }, { "id": "identity_196251", "image": "PMC-VQA/images/PMC5414324_Fig12_159754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the structure in the center of the image?" }, { "from": "gpt", "value": "The answer is Irregular tree-like structure" } ], "clip_score": 22.1875, "loss": 4.140974998474121 }, { "id": "identity_30204", "image": "PMC-VQA/images/PMC7003532_Fig5_112043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to obtain postoperative images?\nChoices:\n(A) MRI \n(B) CT \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.796891212463379 }, { "id": "identity_281834", "image": "ScienceQA/images/8817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly identifies the direction of thermal energy transfer during this time: from each cake to the surroundings or from the surroundings to each cake?\nChoices:\n(A) each cake . . . the surroundings\n(B) the surroundings . . . each cake" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.15625, "loss": 4.469969749450684 }, { "id": "identity_264003", "image": "PMC-VQA/images/PMC4383027_f3-ijo-46-05-1913_374392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of microscope was utilized to capture the images of the colon cancer spheres: Bright field, Fluorescence, Confocal, or Electron?\nChoices:\n(A) Bright field \n(B) Fluorescence \n(C) Confocal \n(D) Electron" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.140625, "loss": 4.3335442543029785 }, { "id": "identity_201915", "image": "VQA2.0/images/507195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the cart have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.265625, "loss": 4.277711868286133 }, { "id": "identity_8442", "image": "PMC-VQA/images/PMC6032467_F8_337932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were used to examine the patient? \nChoices:\n(A) MRI \n(B) CT scan \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.28125, "loss": 4.1934332847595215 }, { "id": "identity_277621", "image": "AI2D/images/1745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe the line formed by the intersection of the fault plate with the earth's surface among the following options? \n(A) Fault Line\n(B) Strike-slip Fault\n(C) Reverse Fault\n(D) Normal Fault" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.171875, "loss": 4.580991744995117 }, { "id": "identity_38229", "image": "PMC-VQA/images/PMC6019330_FIG3_333505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrow indicate in the image? \nChoices:\n(A) Radiolucent zone \n(B) Screw \n(C) Fracture \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.765625, "loss": 4.671722412109375 }, { "id": "identity_16598", "image": "A-OKVQA/images/266533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman waiting for?\nChoices:\n(A) rain stopping\n(B) bus\n(C) cab\n(D) crossing street" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.8548431396484375 }, { "id": "identity_267899", "image": "TQA/images/question_images/waves_8226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the symbol (D) represent in the given diagram?\nChoices:\n(A) trough\n(B) crest\n(C) wavelength\n(D) amplitude" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 4.5010247230529785 }, { "id": "identity_110656", "image": "Super-CLEVR/images/superCLEVR_new_000103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is both right of the green bus and in front of the purple dirtbike?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 35.21875, "loss": 4.71713924407959 }, { "id": "identity_104010", "image": "VQA-AS/images/13004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is hanging on the stand near the window?" }, { "from": "gpt", "value": "The answer is A coat" } ], "clip_score": 18.46875, "loss": 4.205876350402832 }, { "id": "identity_115386", "image": "TextVQA/images/10e0f8abcdbd76e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many figures are accompanying the clock in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.625, "loss": 4.088962554931641 }, { "id": "identity_208076", "image": "PMC-VQA/images/PMC5135102_pone.0167517.g003_101955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.296875, "loss": 4.95456600189209 }, { "id": "identity_331012", "image": "ScienceQA/images/1021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) scratchy\n(B) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2421875, "loss": 4.410704612731934 }, { "id": "identity_304890", "image": "ChartQA/images/9652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the darkest part of each bar located?" }, { "from": "gpt", "value": "The answer is Leftmost" } ], "clip_score": 16.296875, "loss": 4.391452312469482 }, { "id": "identity_32557", "image": "DVQA/images/bar_train_00117269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.40625, "loss": 4.284186840057373 }, { "id": "identity_315850", "image": "VQA2.0/images/178484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is holding the controller?" }, { "from": "gpt", "value": "The answer is one with beard" } ], "clip_score": 20.09375, "loss": 4.435311317443848 }, { "id": "identity_157898", "image": "AI2D/images/374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are shown on the crust of the earth in the image?" }, { "from": "gpt", "value": "The answer is Three trees" } ], "clip_score": 24.484375, "loss": 4.581282138824463 }, { "id": "identity_300493", "image": "TextVQA/images/000ed967c71e566f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which place was this event held?" }, { "from": "gpt", "value": "The answer is florida" } ], "clip_score": 19.15625, "loss": 4.416788578033447 }, { "id": "identity_60384", "image": "DocVQA/images/mmbd0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first word of the 14th point?" }, { "from": "gpt", "value": "The answer is Should" } ], "clip_score": 15.96875, "loss": 4.089587211608887 }, { "id": "identity_186726", "image": "A-OKVQA/images/192955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels does the train have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.640625, "loss": 4.115933895111084 }, { "id": "identity_27372", "image": "AI2D/images/753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow in the diagram pointing at?\nChoices:\n(A) The direction to the nearest shopping mall\n(B) The direction to the Sun\n(C) The Moon\n(D) The Earth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.10085391998291 }, { "id": "identity_128493", "image": "VQA2.0/images/388352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs of the dog can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.71875, "loss": 4.549370765686035 }, { "id": "identity_22770", "image": "PMC-VQA/images/PMC4323131_pone.0117670.g002_357126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of fluid accumulation in dog 9? \nChoices:\n(A) Cerebral cortex \n(B) Lateral cerebellum \n(C) Thalamic nuclei \n(D) Basal ganglia " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.828125, "loss": 4.370833396911621 }, { "id": "identity_6297", "image": "PMC-VQA/images/PMC5719812_Fig3_249998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the technique used to obtain the staining pattern? \nChoices:\n(A) Confocal microscopy \n(B) Electron microscopy \n(C) Fluorescent microscopy \n(D) Immunofluorescence microscopy " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.65625, "loss": 4.139647960662842 }, { "id": "identity_218364", "image": "A-OKVQA/images/169026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.40625, "loss": 4.280917167663574 }, { "id": "identity_31543", "image": "PMC-VQA/images/PMC4195344_fig2_327335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding in the pleural cavity? \nChoices:\n(A) Blood clots \n(B) Scattered nodules \n(C) Lung condensation \n(D) Minimal pleural effusion " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.46875, "loss": 4.416224002838135 }, { "id": "identity_299786", "image": "DocVQA/images/hkkw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which marketing person is responsible for the project?" }, { "from": "gpt", "value": "The answer is Paul Knouse" } ], "clip_score": 18.109375, "loss": 4.349509239196777 }, { "id": "identity_321775", "image": "DocVQA/images/thjy0046_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who conducted a comprehensive review of the origins and developments of low tar cigarettes?" }, { "from": "gpt", "value": "The answer is Richard Baker" } ], "clip_score": 21.234375, "loss": 4.041775226593018 }, { "id": "identity_6279", "image": "FigureQA/images/91233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.625, "loss": 4.102398872375488 }, { "id": "identity_298161", "image": "A-OKVQA/images/246029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body is most protected on the front man?\nChoices:\n(A) knees\n(B) shins\n(C) chest\n(D) nose" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.0703125, "loss": 4.508249282836914 }, { "id": "identity_210916", "image": "PMC-VQA/images/PMC4328294_Fig1_358170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color predominantly seen in the outer areas of the image?" }, { "from": "gpt", "value": "The answer is Dark blue" } ], "clip_score": 23.296875, "loss": 4.623749256134033 }, { "id": "identity_284886", "image": "IconQA/images/92309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.703125, "loss": 4.235062599182129 }, { "id": "identity_323234", "image": "ScienceQA/images/7763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Hartford\n(B) Frankfort\n(C) Colorado Springs\n(D) Louisville" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.015625, "loss": 4.271430969238281 }, { "id": "identity_283157", "image": "IconQA/images/23823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a square using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.40625, "loss": 4.00469446182251 }, { "id": "identity_215426", "image": "VQA2.0/images/374661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many glass items are on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.4765625, "loss": 4.482300758361816 }, { "id": "identity_261784", "image": "A-OKVQA/images/137377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the photo she is capturing?\nChoices:\n(A) yarn\n(B) potato\n(C) doll\n(D) ball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.7890625, "loss": 4.475268363952637 }, { "id": "identity_14809", "image": "TQA/images/question_images/waves_8227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does point D to point H measure?\nChoices:\n(A) Wavelength\n(B) Crest\n(C) Amplitude\n(D) Trough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.90625, "loss": 4.202808380126953 }, { "id": "identity_329388", "image": "PMC-VQA/images/PMC6462967_children-06-00043-f010_465086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the abnormality shown in this ventricle?\nChoices:\n(A) dilatation\n(B) hypertrophy\n(C) hypotrophy\n(D) atrophy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.203125, "loss": 4.522143840789795 }, { "id": "identity_319321", "image": "A-OKVQA/images/399626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which skier, based on the color of their outfit, is at a different height than the others?\nChoices:\n(A) red\n(B) yellow\n(C) green\n(D) brown" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.421875, "loss": 4.287125587463379 }, { "id": "identity_119677", "image": "VizWiz/images/VizWiz_train_00002513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the clock?" }, { "from": "gpt", "value": "The answer is Maples Clock" } ], "clip_score": 29.75, "loss": 5.290791034698486 }, { "id": "identity_272838", "image": "VQA-AS/images/17137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the floor messy?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.4375, "loss": 4.076643466949463 }, { "id": "identity_323089", "image": "A-OKVQA/images/126096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Disney character is visible in the sky?\nChoices:\n(A) goofy\n(B) tinker bell\n(C) minnie mouse\n(D) lucy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.625, "loss": 4.040514945983887 }, { "id": "identity_242949", "image": "VQA2.0/images/176483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible structural features of the cabin, and how do they contribute to its stability and insulation in a snowy environment?" }, { "from": "gpt", "value": "The answer is Log construction, interlocked corners, sloped roof, raised stone foundation" } ], "clip_score": 24.21875, "loss": 4.314747333526611 }, { "id": "identity_11434", "image": "TextVQA/images/469acfb4e587bdcd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: does this translate to \"police\" in english?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 28.34375, "loss": 4.877562999725342 }, { "id": "identity_22751", "image": "A-OKVQA/images/459786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the decorations made of?\nChoices:\n(A) candy canes\n(B) plants\n(C) gnomes\n(D) paper airplanes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6484375, "loss": 4.314243793487549 }, { "id": "identity_151966", "image": "ScienceQA/images/8407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many limbs can be observed on the animal in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.53125, "loss": 4.263289928436279 }, { "id": "identity_128671", "image": "VQA2.0/images/50591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are there in the living area?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.875, "loss": 4.133222579956055 }, { "id": "identity_330717", "image": "ScienceQA/images/5822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Rhode Island\n(B) Illinois\n(C) Massachusetts\n(D) Ohio" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.203125, "loss": 4.798842906951904 }, { "id": "identity_271143", "image": "FigureQA/images/61502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Light Slate? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.75, "loss": 4.247711181640625 }, { "id": "identity_299009", "image": "AI2D/images/3619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located at the center of E?\nChoices:\n(A) lamellar body\n(B) air space\n(C) alveolar fluid\n(D) tubular myelin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.828125, "loss": 4.3829216957092285 }, { "id": "identity_267746", "image": "Super-CLEVR/images/superCLEVR_new_000090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the yellow object in front of the small metal airliner the same size as the motorbike located to the right of the cyan matte motorbike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.671875, "loss": 5.204462051391602 }, { "id": "identity_2917", "image": "DVQA/images/bar_train_00117468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fifth bar from the left?" }, { "from": "gpt", "value": "The answer is storm" } ], "clip_score": 14.5390625, "loss": 4.219995021820068 }, { "id": "identity_279655", "image": "TextVQA/images/5ecd7c8ef530ac52.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of books are being promoted in the advertisement?" }, { "from": "gpt", "value": "The answer is new" } ], "clip_score": 22.765625, "loss": 4.401773452758789 }, { "id": "identity_258682", "image": "PMC-VQA/images/PMC7509077_f0005_259228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify the type of intensity artefact and describe the characteristics that differentiate it from other types of artefacts" }, { "from": "gpt", "value": "The answer is Rim+ type intensity artefact with a bright rim surrounding the lesion" } ], "clip_score": 21.96875, "loss": 4.07410192489624 }, { "id": "identity_19090", "image": "ScienceQA/images/15217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) smooth\n(B) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.09375, "loss": 4.108353614807129 }, { "id": "identity_192769", "image": "A-OKVQA/images/126204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many condiment bottles are visible on the table?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 12.34375, "loss": 4.41782283782959 }, { "id": "identity_315932", "image": "TextVQA/images/954293b4781134d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is coach" } ], "clip_score": 12.3203125, "loss": 5.56442403793335 }, { "id": "identity_290751", "image": "PMC-VQA/images/PMC7837788_fig3_353592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images on day 24 following the onset of symptoms?\nChoices:\n(A) resolution of pulmonary artery defects \n(B) no changes in pulmonary artery defects \n(C) worsening of pulmonary artery defects \n(D) unclear pulmonary artery defects" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.203125, "loss": 4.374606132507324 }, { "id": "identity_306096", "image": "A-OKVQA/images/445019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near this?\nChoices:\n(A) toddler\n(B) baby\n(C) batter\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 3.70703125, "loss": 4.339203834533691 }, { "id": "identity_312130", "image": "AI2D/images/614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the final stage of a mosquito?\nChoices:\n(A) pupa\n(B) egg\n(C) larva\n(D) adult" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.09375, "loss": 4.313729286193848 }, { "id": "identity_218695", "image": "A-OKVQA/images/407522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the audience's hats?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.640625, "loss": 4.1090617179870605 }, { "id": "identity_326903", "image": "A-OKVQA/images/136323.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the tarps shown on these vessels do for the inside of these?\nChoices:\n(A) hold water\n(B) nothing\n(C) keep dry\n(D) signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.765625, "loss": 4.874882698059082 }, { "id": "identity_331325", "image": "PMC-VQA/images/PMC6636264_f3_10016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this labeled as probable UIP?\nChoices:\n(A) Due to the presence of honeycombing\n(B) Due to the absence of honeycombing\n(C) Due to the presence of other pulmonary pattern\n(D) Due to the quality of the image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.171875, "loss": 5.154681205749512 }, { "id": "identity_63135", "image": "ScienceQA/images/4876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Ring magnets" } ], "clip_score": 28.21875, "loss": 4.277902126312256 }, { "id": "identity_35610", "image": "DocVQA/images/rnjf0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the subheading under the title of \"specific findings\"?" }, { "from": "gpt", "value": "The answer is Reaction to IR Information" } ], "clip_score": 21.0625, "loss": 4.754702568054199 }, { "id": "identity_2479", "image": "VizWiz/images/VizWiz_train_00011027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can you see on the picture?" }, { "from": "gpt", "value": "The answer is cup" } ], "clip_score": 24.65625, "loss": 4.041357040405273 }, { "id": "identity_317559", "image": "IconQA/images/72840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shapes even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 4.348575592041016 }, { "id": "identity_202604", "image": "A-OKVQA/images/556176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible electronic devices are there in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.578125, "loss": 4.874598503112793 }, { "id": "identity_281280", "image": "A-OKVQA/images/365618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of lights are currently illuminated on the car? \n(A) headlights \n(B) siren \n(C) brake \n(D) turn signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0546875, "loss": 4.03474760055542 }, { "id": "identity_287222", "image": "VQA-AS/images/3122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do their shirts have the same color?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.1953125, "loss": 4.112209796905518 }, { "id": "identity_110190", "image": "Super-CLEVR/images/superCLEVR_new_001871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wings?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.859375, "loss": 4.246825218200684 }, { "id": "identity_36502", "image": "ScienceQA/images/10728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Albany\n(B) Providence\n(C) Newport\n(D) Concord" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.160623073577881 }, { "id": "identity_269669", "image": "TQA/images/abc_question_images/human_system_respiratory_12882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What letter is used to represent the Pleura?\nChoices:\n(A) A\n(B) H\n(C) N\n(D) L" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.265625, "loss": 4.476161003112793 }, { "id": "identity_188277", "image": "VizWiz/images/VizWiz_train_00013480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plastic bags are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.28125, "loss": 4.587589263916016 }, { "id": "identity_28874", "image": "A-OKVQA/images/404388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red and black tool on the right called?\nChoices:\n(A) hammer\n(B) vice grip\n(C) screwdriver\n(D) pipe wrench" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.15625, "loss": 4.483359336853027 }, { "id": "identity_311492", "image": "AI2D/images/4176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does lithification turn sediment into?\nChoices:\n(A) Magma\n(B) Sedimentary rock\n(C) Igneous rock\n(D) Metamorphic rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.103348731994629 }, { "id": "identity_281567", "image": "A-OKVQA/images/60647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Towards what is the person wearing the black helmet running?\nChoices:\n(A) home plate\n(B) motorcycle\n(C) circus cannon\n(D) brick wall" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.8125, "loss": 4.614814281463623 }, { "id": "identity_322530", "image": "A-OKVQA/images/105732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening here?\nChoices:\n(A) lesson\n(B) music audition\n(C) olympic trials\n(D) initiation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.9453125, "loss": 4.189150810241699 }, { "id": "identity_333401", "image": "A-OKVQA/images/341132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person with gray hair leaning towards the table?\nChoices:\n(A) hiding\n(B) resting\n(C) sitting down\n(D) blowing candles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.390625, "loss": 4.202854156494141 }, { "id": "identity_337307", "image": "A-OKVQA/images/366789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the bicycle wearing?\nChoices:\n(A) hat\n(B) crown\n(C) backpack\n(D) tiara" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.421875, "loss": 4.579322814941406 }, { "id": "identity_13990", "image": "A-OKVQA/images/248818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the paint job help the vehicle do?\nChoices:\n(A) blend in\n(B) stay dry\n(C) avoid rust\n(D) drive fast" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.953125, "loss": 4.354784965515137 }, { "id": "identity_290270", "image": "PMC-VQA/images/PMC8065092_FIG1_423898.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Towards what do the arrows in the image indicate?\nChoices:\n(A) Hemorrhage \n(B) Brain tumor \n(C) Infarct \n(D) Normal brain tissue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.078125, "loss": 5.036751747131348 }, { "id": "identity_274160", "image": "ScienceQA/images/17864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) New Jersey\n(B) Connecticut\n(C) New York\n(D) Pennsylvania" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.515625, "loss": 4.605734825134277 }, { "id": "identity_127825", "image": "TextVQA/images/317b3596da5dcca3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic cones are visible on the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.484375, "loss": 4.468217372894287 }, { "id": "identity_270404", "image": "DVQA/images/bar_train_00169832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label is located fourth from the left in each group?" }, { "from": "gpt", "value": "The answer is pause" } ], "clip_score": 20.078125, "loss": 4.295163154602051 }, { "id": "identity_320803", "image": "PMC-VQA/images/PMC5965134_f1-cpcem-02-26_318079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrowhead in the image?\nChoices:\n(A) Left ventricular mass \n(B) Right atrial mass \n(C) Right ventricular mass \n(D) Left atrial mass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.9375, "loss": 4.735273361206055 }, { "id": "identity_323637", "image": "PMC-VQA/images/PMC3970259_fig1_277548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the enlarged lymph nodes?\nChoices:\n(A) In the head \n(B) In the chest \n(C) In the abdomen \n(D) In the legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.2185163497924805 }, { "id": "identity_225790", "image": "Super-CLEVR/images/superCLEVR_new_000389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the metal object and behind the purple scooter?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 29.265625, "loss": 4.218908309936523 }, { "id": "identity_241570", "image": "A-OKVQA/images/296782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential hazards the skateboarder might face in this environment, and which one is the most likely based on the given conditions?" }, { "from": "gpt", "value": "The answer is pricked feet" } ], "clip_score": 23.859375, "loss": 7.266072750091553 }, { "id": "identity_33398", "image": "A-OKVQA/images/386272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in line concerned about?\nChoices:\n(A) buying tickets\n(B) reclaiming baggage\n(C) buying food\n(D) making reservations" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.3125, "loss": 4.54074764251709 }, { "id": "identity_264407", "image": "DVQA/images/bar_train_00013315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar label is assigned to the second bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is vote" } ], "clip_score": 22.890625, "loss": 4.410981178283691 }, { "id": "identity_289153", "image": "VQA-AS/images/6976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of the plant?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 16.890625, "loss": 4.682652950286865 }, { "id": "identity_334102", "image": "A-OKVQA/images/491981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would people use to exit if there was a fire?\nChoices:\n(A) emergency door\n(B) open entryway\n(C) trap door\n(D) open window" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.9921875, "loss": 4.393138408660889 }, { "id": "identity_326562", "image": "A-OKVQA/images/188009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of timed skill test is this person participating in?\nChoices:\n(A) grind\n(B) ollie\n(C) flip\n(D) slalom" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.734375, "loss": 4.0326642990112305 }, { "id": "identity_309591", "image": "VQA2.0/images/528971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any utensils on the table?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 14.4921875, "loss": 4.087814807891846 }, { "id": "identity_325008", "image": "DocVQA/images/qlhx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was the article \"Vertebrate Keratinization\" published?" }, { "from": "gpt", "value": "The answer is primate news" } ], "clip_score": 21.0625, "loss": 4.642529487609863 }, { "id": "identity_26826", "image": "PMC-VQA/images/PMC6262857_BIO038661F3_402705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in green? \nChoices:\n(A) Gsh2 \n(B) Pax6 \n(C) DAPI \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.484375, "loss": 4.852963924407959 }, { "id": "identity_115206", "image": "TextVQA/images/3fbdd8ec7e440208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the event venue as mentioned on the flyer?" }, { "from": "gpt", "value": "The answer is Graphicriver Street, 23" } ], "clip_score": 22.296875, "loss": 5.029470443725586 }, { "id": "identity_328648", "image": "PMC-VQA/images/PMC5312786_f0020_128569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is highlighted with white circles on the left part of this image?\nChoices:\n(A) ECoG contacts\n(B) BOLD increases\n(C) COIα, COIβ, and COIγ\n(D) Spectral differences" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.53125, "loss": 4.352935791015625 }, { "id": "identity_133970", "image": "VQA2.0/images/473072.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pickle slices are visible on the plate?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.078125, "loss": 4.479241371154785 }, { "id": "identity_104117", "image": "Super-CLEVR/images/superCLEVR_new_001532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the green car?" }, { "from": "gpt", "value": "The answer is A red bus" } ], "clip_score": 21.625, "loss": 4.386950969696045 }, { "id": "identity_252117", "image": "Super-CLEVR/images/superCLEVR_new_000846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the bicycle that is directly in front of the green motorcycle and to the left of the purple airplane?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 34.8125, "loss": 4.538300514221191 }, { "id": "identity_229333", "image": "ChartQA/images/multi_col_316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average infection rate for each virus, and which virus has the lowest average infection rate?" }, { "from": "gpt", "value": "The answer is MERS" } ], "clip_score": 27.359375, "loss": 4.052888870239258 }, { "id": "identity_311197", "image": "ScienceQA/images/1480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this passage describe the weather or something else?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.7734375, "loss": 4.139609336853027 }, { "id": "identity_34933", "image": "PMC-VQA/images/PMC7914843_molecules-26-00847-f002_377457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is represented by the blue triangle? \nChoices:\n(A) Muscle tissue \n(B) Nervous tissue \n(C) Bone tissue \n(D) Epithelial tissue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.578125, "loss": 4.889407634735107 }, { "id": "identity_12064", "image": "VQA2.0/images/67407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are there?" }, { "from": "gpt", "value": "The answer is lot" } ], "clip_score": 14.4609375, "loss": 4.313835144042969 }, { "id": "identity_31680", "image": "PMC-VQA/images/PMC6503986_DEV169342F3_476197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled by the green color in the images? \nChoices:\n(A) Dextran \n(B) Cell nuclei \n(C) Cyst cell membranes \n(D) Mitochondria " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.34375, "loss": 4.656586647033691 }, { "id": "identity_272597", "image": "Super-CLEVR/images/superCLEVR_new_001267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the brown object?" }, { "from": "gpt", "value": "The answer is fighter" } ], "clip_score": 14.109375, "loss": 4.5957489013671875 }, { "id": "identity_324382", "image": "ScienceQA/images/13094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the direction of this action?\nChoices:\n(A) away from the bow string\n(B) toward the bow string" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.156832695007324 }, { "id": "identity_287633", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of stove is depicted in the image provided? \nOptions:\n(A) electric\n(B) natural gas\n(C) wood\n(D) conductive" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.9140625, "loss": 4.687585353851318 }, { "id": "identity_330096", "image": "PMC-VQA/images/PMC3681594_F2_211112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the appearance of this in the portal phase?\nChoices:\n(A) Solid \n(B) Cystic \n(C) Necrotic \n(D) Enhanced" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.484375, "loss": 4.8302083015441895 }, { "id": "identity_260035", "image": "IconQA/images/37332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cakes are there in each row and each column, and what is the total number of cakes?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 30.09375, "loss": 4.152855396270752 }, { "id": "identity_119359", "image": "Super-CLEVR/images/superCLEVR_new_001508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.921875, "loss": 4.706233501434326 }, { "id": "identity_267619", "image": "PMC-VQA/images/PMC6059037_F2_345718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is the representation of the green arrow? Is it:\n(A) Column A\n(B) Column B\n(C) Column C\n(D) Column D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8515625, "loss": 4.3211798667907715 }, { "id": "identity_281341", "image": "PMC-VQA/images/PMC7234030_FIG1_173195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the observation depicted in the image provided?\nChoices:\n(A) Cavitation \n(B) Consolidation \n(C) Nodular opacities \n(D) Pleural effusion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.65625, "loss": 4.2897114753723145 }, { "id": "identity_262510", "image": "Super-CLEVR/images/superCLEVR_new_000601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other items in the same color as the small airplane?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 24.828125, "loss": 5.1666951179504395 }, { "id": "identity_292298", "image": "ScienceQA/images/1021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property best describes this object: scratchy or slippery?\nChoices:\n(A) scratchy\n(B) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.375, "loss": 4.332283973693848 }, { "id": "identity_192620", "image": "PMC-VQA/images/PMC4058584_fig1_298397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is being assessed in this ultrasound image?" }, { "from": "gpt", "value": "The answer is A blood vessel" } ], "clip_score": 32.1875, "loss": 4.33721399307251 }, { "id": "identity_279300", "image": "AI2D/images/771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the first quarter moon in the given choices: (A) c, (B) e, (C) d, or (D) b?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.65625, "loss": 4.100961208343506 }, { "id": "identity_217832", "image": "TextVQA/images/7e12d937e6e90065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stove burners are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.0625, "loss": 4.620851039886475 }, { "id": "identity_315416", "image": "Super-CLEVR/images/superCLEVR_new_000377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small cyan tandem bikes on the right side of this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.5, "loss": 5.198755264282227 }, { "id": "identity_302583", "image": "FigureQA/images/26685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.375, "loss": 4.152233123779297 }, { "id": "identity_31859", "image": "VizWiz/images/VizWiz_train_00016973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the item?" }, { "from": "gpt", "value": "The answer is dsq" } ], "clip_score": 18.890625, "loss": 6.269840717315674 }, { "id": "identity_197386", "image": "A-OKVQA/images/524470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any traffic visible in the image?" }, { "from": "gpt", "value": "The answer is No traffic" } ], "clip_score": 22.40625, "loss": 4.234732627868652 }, { "id": "identity_333526", "image": "Super-CLEVR/images/superCLEVR_new_000750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer small jets than small purple metallic cruisers?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.515625, "loss": 5.177392482757568 }, { "id": "identity_244745", "image": "ScienceQA/images/4713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the climatic conditions of Cape Breton Highlands National Park influence its flora and fauna diversity?" }, { "from": "gpt", "value": "The answer is Limited growing season supports cold-resistant flora and adapted fauna" } ], "clip_score": 29.640625, "loss": 4.132808685302734 }, { "id": "identity_46114", "image": "DVQA/images/bar_train_00125154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'tree' algorithm?" }, { "from": "gpt", "value": "The answer is wheel" } ], "clip_score": 33.625, "loss": 4.189099311828613 }, { "id": "identity_280093", "image": "A-OKVQA/images/567342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is abundantly visible inside the large silver round cans mentioned here?\nChoices:\n(A) meats\n(B) condiments\n(C) main dish\n(D) side dish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 4.390576362609863 }, { "id": "identity_66519", "image": "ScienceQA/images/19245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.109375, "loss": 4.739335536956787 }, { "id": "identity_31488", "image": "A-OKVQA/images/259099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What vehicle is boarded in this building?\nChoices:\n(A) cars\n(B) bus\n(C) train\n(D) gondola" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.55078125, "loss": 4.220857620239258 }, { "id": "identity_210828", "image": "VQA-AS/images/17547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is depicted to the right in the image?" }, { "from": "gpt", "value": "The answer is Broadleaf" } ], "clip_score": 17.515625, "loss": 4.344659805297852 }, { "id": "identity_254917", "image": "VQA2.0/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is likely taking place in this room, considering the presence of photography equipment and the arrangement of chairs?" }, { "from": "gpt", "value": "The answer is A photo session or waiting area" } ], "clip_score": 33.125, "loss": 4.208926677703857 }, { "id": "identity_37183", "image": "VQA2.0/images/63938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the counters?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 17.796875, "loss": 4.341708183288574 }, { "id": "identity_295794", "image": "A-OKVQA/images/197167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction is the narrowest visible point of the board pointing? \n(A) west\n(B) south\n(C) north\n(D) east" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.3125, "loss": 4.189786911010742 }, { "id": "identity_37845", "image": "A-OKVQA/images/464237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is under the kites and to the left?\nChoices:\n(A) apple\n(B) cardboard boxes\n(C) bicycle\n(D) scarecrow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.984375, "loss": 4.1092753410339355 }, { "id": "identity_23727", "image": "A-OKVQA/images/98636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of surface is the bird standing on?\nChoices:\n(A) metal\n(B) glass\n(C) brick\n(D) concrete" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.9765625, "loss": 4.7610182762146 }, { "id": "identity_287654", "image": "AI2D/images/972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe a flower with a bell shape?\nChoices:\n(A) Funnelform\n(B) Tubular\n(C) Campanulate\n(D) Cruciform" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.114007472991943 }, { "id": "identity_277637", "image": "FigureQA/images/7634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise considered the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.546875, "loss": 4.07218074798584 }, { "id": "identity_331301", "image": "VizWiz/images/VizWiz_train_00017646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label say on this? Thank you." }, { "from": "gpt", "value": "The answer is peroxyl colgate" } ], "clip_score": 34.25, "loss": 4.122326374053955 }, { "id": "identity_199879", "image": "PMC-VQA/images/PMC8133422_pone.0251788.g001_445198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different views of the knee are presented in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.03125, "loss": 4.073732376098633 }, { "id": "identity_238398", "image": "Super-CLEVR/images/superCLEVR_new_001132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small thing that is right of the big thing that is behind the big rubber car?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 21.6875, "loss": 4.041835308074951 }, { "id": "identity_295737", "image": "A-OKVQA/images/111930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which company's logo is visible on the white snow ramp among the following options: Prada, Gucci, DC, or Vans?\nChoices:\n(A) prada\n(B) gucci\n(C) dc\n(D) vans" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.75, "loss": 4.011480808258057 }, { "id": "identity_6838", "image": "TabMWP/images/12018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.417769432067871 }, { "id": "identity_309123", "image": "TQA/images/question_images/atomic_structure_9016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which particle carries a positive charge?\nChoices:\n(A) Nucleus\n(B) Proton\n(C) Electron\n(D) Neutron" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.734375, "loss": 4.099608421325684 }, { "id": "identity_302230", "image": "A-OKVQA/images/209930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bread might uniquely be available near this place?\nChoices:\n(A) hotcross buns\n(B) sourdough\n(C) rye\n(D) cheddar bay" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.421875, "loss": 4.328736305236816 }, { "id": "identity_327623", "image": "PMC-VQA/images/PMC5093274_fig6_90987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in red in the photomicrographs?\nChoices:\n(A) Turnbull's Blue staining\n(B) hBMSC injection\n(C) MAB1281\n(D) Fluorescence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.984375, "loss": 4.854941368103027 }, { "id": "identity_334203", "image": "PMC-VQA/images/PMC6636501_fig1_10077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images are shown on the left side?\nChoices:\n(A) CT scans\n(B) PET scans\n(C) MRI sequences\n(D) X-rays" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.921875, "loss": 4.326540946960449 }, { "id": "identity_32522", "image": "VQA2.0/images/402987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are both women wearing jeans?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.34375, "loss": 4.128293037414551 }, { "id": "identity_297901", "image": "FigureQA/images/89098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2734375, "loss": 4.025171279907227 }, { "id": "identity_189233", "image": "Super-CLEVR/images/superCLEVR_new_001357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 22.328125, "loss": 4.0667290687561035 }, { "id": "identity_296784", "image": "A-OKVQA/images/71908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can this ramp be found?\nChoices:\n(A) skate park\n(B) boardwalk\n(C) grocery parking\n(D) parking lot" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.046875, "loss": 4.102847576141357 }, { "id": "identity_331981", "image": "PMC-VQA/images/PMC3166290_pone-0024187-g001_107299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is depicted in the image?\nChoices:\n(A) Cerebellum \n(B) Thalamus \n(C) Hypothalamus \n(D) Medulla" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.1875, "loss": 4.1664252281188965 }, { "id": "identity_317130", "image": "A-OKVQA/images/376921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this person about to do?\nChoices:\n(A) move residences\n(B) sales call\n(C) work errand\n(D) cook lunch" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2578125, "loss": 4.539904594421387 }, { "id": "identity_221819", "image": "PMC-VQA/images/PMC6739015_F1_36540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown?" }, { "from": "gpt", "value": "The answer is Coronal plane" } ], "clip_score": 23.375, "loss": 4.088376522064209 }, { "id": "identity_314091", "image": "A-OKVQA/images/130816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the Cardinals playing?\nChoices:\n(A) home field\n(B) wrigley\n(C) dodger stadium\n(D) shea" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.75, "loss": 4.174201011657715 }, { "id": "identity_261417", "image": "PMC-VQA/images/PMC8312902_F1_20344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the red and blue box insets in the image represent?\nChoices:\n(A) An electron microscope \n(B) Viral replication complex \n(C) RNA \n(D) A protein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.81880521774292 }, { "id": "identity_284601", "image": "A-OKVQA/images/65643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the name of the three-wheeled vehicle shown in the middle of the picture: rickshaw, scooter, tuk tuk, or cushman?\nChoices:\n(A) rickshaw\n(B) scooter\n(C) tuk tuk\n(D) cushman" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.359375, "loss": 4.0652265548706055 }, { "id": "identity_105858", "image": "VQA2.0/images/346004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boxes are attached to the bicycle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.359375, "loss": 4.5824151039123535 }, { "id": "identity_271407", "image": "PMC-VQA/images/PMC7457485_Fig1_242646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What anomaly is detected in the aorta?\nChoices:\n(A) Intimal flap \n(B) Atheroma \n(C) Fusiform aneurysm \n(D) Saccular aneurysm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.140625, "loss": 4.3722405433654785 }, { "id": "identity_267117", "image": "FigureQA/images/48399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.09375, "loss": 4.306972026824951 }, { "id": "identity_277008", "image": "ScienceQA/images/15477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) Georgia\n(B) South Carolina\n(C) Massachusetts\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.647395133972168 }, { "id": "identity_151629", "image": "TQA/images/abc_question_images/earth_moon_phases_10138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phase before the Full Moon?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 27.34375, "loss": 4.343396186828613 }, { "id": "identity_224861", "image": "AI2D/images/2146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the diagram, what is the sequence of stages from eggs to an adult shark, and which letter represents the stage immediately before the adult shark?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.796875, "loss": 4.545123100280762 }, { "id": "identity_149524", "image": "DocVQA/images/qpkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the letter addressed to?" }, { "from": "gpt", "value": "The answer is Doctor" } ], "clip_score": 19.796875, "loss": 5.217698097229004 }, { "id": "identity_108360", "image": "VQA2.0/images/428288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible patches of grass are there on the rock?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.703125, "loss": 4.226273536682129 }, { "id": "identity_253956", "image": "TextVQA/images/95138708aa79803c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different comic titles are displayed on the table, and what are their names?" }, { "from": "gpt", "value": "The answer is Three titles: Space Boy, Deadtective, Danny Fanny" } ], "clip_score": 27.296875, "loss": 4.300265789031982 }, { "id": "identity_66034", "image": "ScienceQA/images/7802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What would be the result if the ball of wet clay is left to dry?" }, { "from": "gpt", "value": "The answer is It becomes hard" } ], "clip_score": 38.0, "loss": 4.242955684661865 }, { "id": "identity_290282", "image": "A-OKVQA/images/273653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of team is being celebrated in this scenario? \n(A) NFL football\n(B) MLB baseball\n(C) NHL hockey\n(D) NBA basketball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.8515625, "loss": 4.097951412200928 }, { "id": "identity_299817", "image": "A-OKVQA/images/428595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options do the women here find most interesting: walkway, goose, child, or duck?\nChoices:\n(A) walkway\n(B) goose\n(C) child\n(D) duck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.25, "loss": 4.187978744506836 }, { "id": "identity_118761", "image": "TextVQA/images/7ed1317c76615c11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the monitors displayed?" }, { "from": "gpt", "value": "The answer is Marshall" } ], "clip_score": 26.765625, "loss": 4.649214267730713 }, { "id": "identity_76681", "image": "TQA/images/abc_question_images/nuclear_energy_18107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of particles are shown in the diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.171875, "loss": 4.121655464172363 }, { "id": "identity_278418", "image": "ScienceQA/images/5657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a type of fish? \nChoices:\n(A) clownfish\n(B) gray wolf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.328125, "loss": 4.114953517913818 }, { "id": "identity_331840", "image": "A-OKVQA/images/449029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What flag is visible here?\nChoices:\n(A) france\n(B) china\n(C) united states\n(D) germany" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.4140625, "loss": 4.014230728149414 }, { "id": "identity_337270", "image": "PMC-VQA/images/PMC7418900_F3_232584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of image is shown in the caption?\nChoices:\n(A) T2-weighted image \n(B) PET scan image \n(C) CT scan image \n(D) X-ray image" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.28125, "loss": 4.110194206237793 }, { "id": "identity_296262", "image": "Super-CLEVR/images/superCLEVR_new_000966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the small rubber object to the right of the brown matte bus and the small matte object in front of the large cyan matte double bus the same shape?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 35.78125, "loss": 5.443814277648926 }, { "id": "identity_29747", "image": "VQA2.0/images/320875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there two adults?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.8046875, "loss": 4.195217609405518 }, { "id": "identity_280988", "image": "Super-CLEVR/images/superCLEVR_new_000819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorbike located in front of the red object behind the large metal object on the left side of the cruiser?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 26.25, "loss": 4.935168266296387 }, { "id": "identity_121100", "image": "PMC-VQA/images/PMC7921074_F3_380336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs can be observed in this image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.875, "loss": 4.5155768394470215 }, { "id": "identity_265067", "image": "AI2D/images/2178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the life cycle phase that comes after the planula larva phase? \nChoices:\n(A) Ephyra\n(B) Polyp phase\n(C) Medusa\n(D) Polyp with buds" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.234375, "loss": 4.180277347564697 }, { "id": "identity_122787", "image": "TextVQA/images/6719e91a5b7f30e6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the crosswalk?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.453125, "loss": 4.598383903503418 }, { "id": "identity_215160", "image": "TextVQA/images/d91417b34c914c9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of district is located at the bottom left corner of the floor plan?" }, { "from": "gpt", "value": "The answer is Audio District" } ], "clip_score": 24.09375, "loss": 4.604791641235352 }, { "id": "identity_297300", "image": "TQA/images/question_images/parts_fish_2909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fin is utilized for forward movement among the following options: (A) dorsal, (B) caudal, (C) pelvic, or (D) pectoral?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.097641944885254 }, { "id": "identity_296441", "image": "ScienceQA/images/21193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city serves as the capital of North Carolina?\n(A) Raleigh\n(B) Charlotte\n(C) Jackson\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.9375, "loss": 4.209677696228027 }, { "id": "identity_196651", "image": "A-OKVQA/images/122839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many public call boxes are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.53125, "loss": 4.332747459411621 }, { "id": "identity_132804", "image": "TextVQA/images/26f3623b930a4fb3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many McDonald's delivery motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.046875, "loss": 4.398594856262207 }, { "id": "identity_260275", "image": "A-OKVQA/images/247190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons for the elephant being chained, and how can you determine which one is correct based on the context provided in the image?" }, { "from": "gpt", "value": "The answer is It's property" } ], "clip_score": 27.3125, "loss": 6.017425060272217 }, { "id": "identity_45872", "image": "DVQA/images/bar_train_00137903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the object category with the smallest total preference?" }, { "from": "gpt", "value": "The answer is manner" } ], "clip_score": 31.28125, "loss": 5.982461452484131 }, { "id": "identity_338358", "image": "TextVQA/images/3492f2b6c34e80cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the character drawn to the left of this?" }, { "from": "gpt", "value": "The answer is roz" } ], "clip_score": 24.84375, "loss": 5.4969611167907715 }, { "id": "identity_299929", "image": "A-OKVQA/images/247190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind having a chain on the elephant?\nChoices:\n(A) it's dangerous\n(B) it's injured\n(C) it's property\n(D) it's stylish" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.171875, "loss": 4.240692138671875 }, { "id": "identity_326303", "image": "PMC-VQA/images/PMC6607307_gch2201800040-fig-0003_3410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material shown in the SEM images?\nChoices:\n(A) Metal\n(B) Plastic\n(C) Glass\n(D) Wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.1870903968811035 }, { "id": "identity_283499", "image": "A-OKVQA/images/440534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are the two individuals directly above and below the snowboarder engaged in?\nChoices:\n(A) clapping\n(B) cheering\n(C) filming\n(D) spectating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.625, "loss": 4.227322101593018 }, { "id": "identity_32106", "image": "VQA2.0/images/409681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of hat is the closest person wearing?" }, { "from": "gpt", "value": "The answer is sun" } ], "clip_score": 14.828125, "loss": 4.99066686630249 }, { "id": "identity_77321", "image": "ScienceQA/images/12305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.171875, "loss": 4.799164295196533 }, { "id": "identity_106249", "image": "PMC-VQA/images/PMC4989014_Fig5_63839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 24.078125, "loss": 4.09636926651001 }, { "id": "identity_16735", "image": "PMC-VQA/images/PMC8063040_F1_423084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the view shown in the image? \nChoices:\n(A) Axial view \n(B) Coronal view \n(C) Sagittal view \n(D) Oblique view " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.953125, "loss": 4.346184730529785 }, { "id": "identity_245172", "image": "PMC-VQA/images/PMC5815729_F1_276744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in performing rigid image registration, and how does it differ from other methods listed (CT0, CT50, Manual)?" }, { "from": "gpt", "value": "The answer is Rigid registration involves mathematical transformations for reproducible image alignment" } ], "clip_score": 23.796875, "loss": 4.056735515594482 }, { "id": "identity_3986", "image": "IconQA/images/38604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough spoons for every plate?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.34375, "loss": 4.150125026702881 }, { "id": "identity_105491", "image": "VQA2.0/images/8431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bears are in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.5, "loss": 4.375461101531982 }, { "id": "identity_26245", "image": "PMC-VQA/images/PMC7564854_F0001_275742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the clavicle is involved in the image? \nChoices:\n(A) Medial end \n(B) Lateral end \n(C) Middle of the clavicle \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.015625, "loss": 4.063319206237793 }, { "id": "identity_290325", "image": "PMC-VQA/images/PMC7977565_Fig2_395418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the brain was impacted by the lesion?\nOptions:\n(A) Left capsular-thalamic region\n(B) Right capsular-thalamic region\n(C) Midline\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.234375, "loss": 4.403540134429932 }, { "id": "identity_920", "image": "AI2D/images/4177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the above diagram what does the letter B refers to?\nChoices:\n(A) magma\n(B) rocks\n(C) sediment\n(D) metamorphic" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.046875, "loss": 4.485393047332764 }, { "id": "identity_13411", "image": "TQA/images/question_images/velocity_time_graphs_8214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which graph shows a velocity value in a straight line parallel to time?\nChoices:\n(A) negative acceleration\n(B) positive quaternion\n(C) positive acceleration\n(D) positive velocity" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.5625, "loss": 4.367519855499268 }, { "id": "identity_36935", "image": "PMC-VQA/images/PMC5872966_materials-11-00387-f001_294549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the inset of the image? \nChoices:\n(A) The top view of thin BCN sheets \n(B) The structure of BCN sheets at the atomic level \n(C) The crystallographic orientation of thin BCN sheets \n(D) The size distribution of thin BCN sheets " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.875, "loss": 4.135257720947266 }, { "id": "identity_282892", "image": "VQA2.0/images/153749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which logo is displayed on the man's hat?" }, { "from": "gpt", "value": "The answer is scamp" } ], "clip_score": 15.4296875, "loss": 4.149961948394775 }, { "id": "identity_224441", "image": "Super-CLEVR/images/superCLEVR_new_000584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is the same material as the big purple object and is located to the left of the gold car?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 30.34375, "loss": 4.438266277313232 }, { "id": "identity_296564", "image": "A-OKVQA/images/490306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the girl imitating a model by positioning her body?\nChoices:\n(A) walking\n(B) posing\n(C) crying\n(D) twirling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.828125, "loss": 4.400867462158203 }, { "id": "identity_118713", "image": "TextVQA/images/0dade82f8d16aeeb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct words can be read on the 'STOP' sign?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 12.875, "loss": 4.930184364318848 }, { "id": "identity_28552", "image": "AI2D/images/2868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the epidermis cover?\nChoices:\n(A) hair bulb\n(B) fat\n(C) dermis\n(D) vein" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.28125, "loss": 4.169425964355469 }, { "id": "identity_48441", "image": "ChartQA/images/9845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of unfavorable views of the U.S. from 2002 to 2015?" }, { "from": "gpt", "value": "The answer is It generally decreased" } ], "clip_score": 31.828125, "loss": 4.349571228027344 }, { "id": "identity_1538", "image": "FigureQA/images/38220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.075125217437744 }, { "id": "identity_286986", "image": "PMC-VQA/images/PMC8138938_F3_446747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the turquoise color represent in the image? \nOptions:\n(A) Presence of cell walls,\n(B) Location of nucleus-nucleus contact areas,\n(C) Location of cell membranes,\n(D) Location of NPs crossing a cell wall." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.609375, "loss": 4.6266374588012695 }, { "id": "identity_280333", "image": "ScienceQA/images/19654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Irma's experiment to address:\n(A) Does the temperature inside white boxes increase more than in black boxes when exposed to sunlight?\n(B) Do the sizes of the boxes affect the internal temperatures?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.3125, "loss": 4.031578540802002 }, { "id": "identity_266969", "image": "FigureQA/images/83338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the curve with the minimum area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.953125, "loss": 4.138676166534424 }, { "id": "identity_127744", "image": "A-OKVQA/images/196192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest suitcase in the image?" }, { "from": "gpt", "value": "The answer is Dark Blue" } ], "clip_score": 27.53125, "loss": 4.1966552734375 }, { "id": "identity_291462", "image": "IconQA/images/51737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a fish, which picture corresponds to the seventh position in the sequence? \nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.765625, "loss": 4.425073146820068 }, { "id": "identity_108311", "image": "VQA-AS/images/7930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the frame of the picture hanging on the wall?" }, { "from": "gpt", "value": "The answer is gold" } ], "clip_score": 25.4375, "loss": 4.229426383972168 }, { "id": "identity_319766", "image": "A-OKVQA/images/455588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the boat's situation?\nChoices:\n(A) mooring\n(B) lassoing\n(C) docked\n(D) anchored" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.359375, "loss": 4.47686767578125 }, { "id": "identity_333894", "image": "PMC-VQA/images/PMC7221059_F4_169144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the issue in the angiogram?\nChoices:\n(A) Aneurysm\n(B) Hemorrhage\n(C) Thrombosis\n(D) Vasospasm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.09375, "loss": 4.268214225769043 }, { "id": "identity_130979", "image": "A-OKVQA/images/346266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the yellow boat?" }, { "from": "gpt", "value": "The answer is KAIJIN" } ], "clip_score": 26.84375, "loss": 4.598837852478027 }, { "id": "identity_328747", "image": "A-OKVQA/images/101860.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MLB team does the person at bat play for?\nChoices:\n(A) orioles\n(B) mets\n(C) mariners\n(D) braves" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0, "loss": 4.181103706359863 } ]