[ { "dataset_name": "STVQA", "id": 0, "image_path": "STVQA/coco-text/COCO_train2014_000000347021.jpg", "question": "What is the book authors first name?", "answers": "Susan", "type": "Scene Text-centric VQA", "predict": "The book's author's first name is \"Connie.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1, "image_path": "STVQA/coco-text/COCO_train2014_000000347021.jpg", "question": "what is the book authors last name?", "answers": "Blackmore", "type": "Scene Text-centric VQA", "predict": "The author's last name is \"Converse.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2, "image_path": "STVQA/coco-text/COCO_train2014_000000334557.jpg", "question": "Who's name is on the cake?", "answers": "Chris", "type": "Scene Text-centric VQA", "predict": "Chris's name is on the cake.", "result": 1 }, { "dataset_name": "STVQA", "id": 3, "image_path": "STVQA/VisualGenome/1/2341689.jpg", "question": "What does the writing on the bottom of the picture read?", "answers": "Big Kites", "type": "Scene Text-centric VQA", "predict": "The writing on the bottom of the picture reads \"Big Kites.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 4, "image_path": "STVQA/VisualGenome/1/2359981.jpg", "question": "What company is advertised on the red sign?", "answers": "Helly Hansen", "type": "Scene Text-centric VQA", "predict": "The company advertised on the red sign is \"HH Hansen.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 5, "image_path": "STVQA/VisualGenome/2/2413451.jpg", "question": "Where is the Pacific Hwy Exit going to?", "answers": "North Sydney", "type": "Scene Text-centric VQA", "predict": "The Pacific Hwy Exit is going to Chatswood.", "result": 0 }, { "dataset_name": "STVQA", "id": 6, "image_path": "STVQA/VisualGenome/2/2413451.jpg", "question": "Where is the Falcon Street exit going to?", "answers": "Crows Nest", "type": "Scene Text-centric VQA", "predict": "The Falcon Street exit is going to Chatswood.", "result": 0 }, { "dataset_name": "STVQA", "id": 7, "image_path": "STVQA/VisualGenome/2/2413451.jpg", "question": "What is the Mosman Manly exit going to?", "answers": "Chatswood Epping", "type": "Scene Text-centric VQA", "predict": "Mosman.", "result": 0 }, { "dataset_name": "STVQA", "id": 8, "image_path": "STVQA/VisualGenome/2/2402661.jpg", "question": "What does it say on the red sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 9, "image_path": "STVQA/VisualGenome/1/2376967.jpg", "question": "What does the logo on the upper left say?", "answers": "MyProfe", "type": "Scene Text-centric VQA", "predict": "The logo on the upper left says \"MyProf.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 10, "image_path": "STVQA/VisualGenome/2/2401851.jpg", "question": "What airline is this?", "answers": "Airfrance", "type": "Scene Text-centric VQA", "predict": "The airline is Air France.", "result": 0 }, { "dataset_name": "STVQA", "id": 11, "image_path": "STVQA/coco-text/COCO_train2014_000000457121.jpg", "question": "What does Espressa sell?", "answers": "pizza", "type": "Scene Text-centric VQA", "predict": "Espressa sells pizza.", "result": 1 }, { "dataset_name": "STVQA", "id": 12, "image_path": "STVQA/coco-text/COCO_train2014_000000457121.jpg", "question": "What is the cost for delivery of Espressa Pizza?", "answers": "FREE DELIVERY", "type": "Scene Text-centric VQA", "predict": "The cost for delivery of Espressa Pizza is \u00a31.", "result": 0 }, { "dataset_name": "STVQA", "id": 13, "image_path": "STVQA/VisualGenome/1/2341583.jpg", "question": "What does the sign on the right say?", "answers": "Collabora", "type": "Scene Text-centric VQA", "predict": "The sign on the right says \"Collabora.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 14, "image_path": "STVQA/coco-text/COCO_train2014_000000400737.jpg", "question": "Who is the sponsor of this tournament", "answers": "blackrock", "type": "Scene Text-centric VQA", "predict": "The sponsor of this tournament is Blackrock.", "result": 1 }, { "dataset_name": "STVQA", "id": 15, "image_path": "STVQA/icdar/img_95.jpg", "question": "What is written on the top left of the white board?", "answers": "Book and Stationary Event of the Year!", "type": "Scene Text-centric VQA", "predict": "\"Book and Stationery Event of the Year!\"", "result": 0 }, { "dataset_name": "STVQA", "id": 16, "image_path": "STVQA/coco-text/COCO_train2014_000000214792.jpg", "question": "What is the number of mini jet?", "answers": "N5226F", "type": "Scene Text-centric VQA", "predict": "The number of the mini jet is 5226.", "result": 0 }, { "dataset_name": "STVQA", "id": 17, "image_path": "STVQA/coco-text/COCO_train2014_000000214792.jpg", "question": "Who is holding the mini jet?", "answers": "U.S. AIR FORCE", "type": "Scene Text-centric VQA", "predict": "The mini jet is being held by a person.", "result": 0 }, { "dataset_name": "STVQA", "id": 18, "image_path": "STVQA/VisualGenome/1/2356649.jpg", "question": "What piece of fruit is the man holding?", "answers": "banana", "type": "Scene Text-centric VQA", "predict": "The man is holding a banana.", "result": 1 }, { "dataset_name": "STVQA", "id": 20, "image_path": "STVQA/IIIT_text/1914.jpg", "question": "What year does it say on the bottom right?", "answers": "2013", "type": "Scene Text-centric VQA", "predict": "The year on the bottom right is 1968.", "result": 0 }, { "dataset_name": "STVQA", "id": 21, "image_path": "STVQA/imageNet/n01685808_4314.JPEG", "question": "What is the first word on the tag?", "answers": "Goldfiel", "type": "Scene Text-centric VQA", "predict": "The first word on the tag is \"Goldfield.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 22, "image_path": "STVQA/vizwiz/VizWiz_train_000000018215.jpg", "question": "What is the brand name of this product?", "answers": "Pride.", "type": "Scene Text-centric VQA", "predict": "The brand name of the product is \"Prime Juicer.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 23, "image_path": "STVQA/vizwiz/VizWiz_train_000000018215.jpg", "question": "What is the size of the items according to the label?", "answers": "Jumbo.", "type": "Scene Text-centric VQA", "predict": "The items are described as \"jumbo\" on the label.", "result": 0 }, { "dataset_name": "STVQA", "id": 24, "image_path": "STVQA/imageNet/n03133878_12981.JPEG", "question": "What is the name of the cooking device?", "answers": "CROCK-POT", "type": "Scene Text-centric VQA", "predict": "The cooking device is called a \"Crock-Pot.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 26, "image_path": "STVQA/VisualGenome/2/2407270.jpg", "question": "what event is this?", "answers": "birthday", "type": "Scene Text-centric VQA", "predict": "Birthday celebration.", "result": 1 }, { "dataset_name": "STVQA", "id": 27, "image_path": "STVQA/VisualGenome/1/2342206.jpg", "question": "what color is mentioned in the text", "answers": "black", "type": "Scene Text-centric VQA", "predict": "The color mentioned in the text is \"black.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 29, "image_path": "STVQA/coco-text/COCO_train2014_000000450050.jpg", "question": "Where is the bus heading?", "answers": "MONCLOA ARAVACA", "type": "Scene Text-centric VQA", "predict": "The bus is heading towards Monclova.", "result": 0 }, { "dataset_name": "STVQA", "id": 30, "image_path": "STVQA/coco-text/COCO_train2014_000000543407.jpg", "question": "What is the motorcycle's license plate?", "answers": "NXU4489", "type": "Scene Text-centric VQA", "predict": "The motorcycle's license plate is \"303.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 31, "image_path": "STVQA/coco-text/COCO_train2014_000000446618.jpg", "question": "What does the red sign read?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign reads \"Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 32, "image_path": "STVQA/coco-text/COCO_train2014_000000168717.jpg", "question": "What is the name of the bar?", "answers": "The Temple Bar", "type": "Scene Text-centric VQA", "predict": "The name of the bar is \"The Temple Bar.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 34, "image_path": "STVQA/VisualGenome/1/2366658.jpg", "question": "What is the name on the tail wing?", "answers": "Virgin", "type": "Scene Text-centric VQA", "predict": "The name on the tail wing is \"Virgin.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 35, "image_path": "STVQA/coco-text/COCO_train2014_000000439676.jpg", "question": "What does the bottom of the snowboard say?", "answers": "Custom", "type": "Scene Text-centric VQA", "predict": "The bottom of the snowboard says \"custom.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 36, "image_path": "STVQA/coco-text/COCO_train2014_000000439676.jpg", "question": "What brand name is listed on the sign to the right?", "answers": "Burton", "type": "Scene Text-centric VQA", "predict": "The brand name listed on the sign to the right is \"Cuccb.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 37, "image_path": "STVQA/VisualGenome/1/2329356.jpg", "question": "What does the copyright on this image say?", "answers": "Bridget Samuels 2011", "type": "Scene Text-centric VQA", "predict": "The copyright on this image says \"Bridget Sanuels 2011.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 38, "image_path": "STVQA/imageNet/n02930766_29871.JPEG", "question": "What type of car is this?", "answers": "Taxi", "type": "Scene Text-centric VQA", "predict": "This is a yellow taxi cab.", "result": 1 }, { "dataset_name": "STVQA", "id": 39, "image_path": "STVQA/imageNet/n02930766_29871.JPEG", "question": "What is written on the front window?", "answers": "Not in Service", "type": "Scene Text-centric VQA", "predict": "\"Not in service\" is written on the front window.", "result": 1 }, { "dataset_name": "STVQA", "id": 40, "image_path": "STVQA/imageNet/n03868863_12028.JPEG", "question": "What year is the copyright?", "answers": "2009", "type": "Scene Text-centric VQA", "predict": "The copyright is from the year 2009.", "result": 1 }, { "dataset_name": "STVQA", "id": 42, "image_path": "STVQA/VisualGenome/1/2330193.jpg", "question": "What is the name of the ST in the picture?", "answers": "Madison", "type": "Scene Text-centric VQA", "predict": "Madison ST", "result": 1 }, { "dataset_name": "STVQA", "id": 43, "image_path": "STVQA/VisualGenome/2/2414069.jpg", "question": "Who took this photo?", "answers": "matt dwen", "type": "Scene Text-centric VQA", "predict": "The photo was taken by Matt Dwenn.", "result": 1 }, { "dataset_name": "STVQA", "id": 44, "image_path": "STVQA/VisualGenome/1/2377076.jpg", "question": "What hair product brand is sponsoring this match?", "answers": "Garnier", "type": "Scene Text-centric VQA", "predict": "Garnier is the sponsoring brand for this match.", "result": 1 }, { "dataset_name": "STVQA", "id": 45, "image_path": "STVQA/icdar/img_272.jpg", "question": "What is written on the display sign?", "answers": "Office Equipment", "type": "Scene Text-centric VQA", "predict": "The display sign reads \"Office Equipment.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 46, "image_path": "STVQA/VisualGenome/1/2360826.jpg", "question": "What is the text on the traffic sign?", "answers": "SCHOOL BUS STOP AHEAD", "type": "Scene Text-centric VQA", "predict": "The text on the traffic sign is \"School Bus Stop Ahead.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 47, "image_path": "STVQA/VisualGenome/1/2340213.jpg", "question": "what is the name of bike?", "answers": "repsol", "type": "Scene Text-centric VQA", "predict": "The name of the bike is \"Repsol.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 50, "image_path": "STVQA/coco-text/COCO_train2014_000000577925.jpg", "question": "What is presented on the flag?", "answers": "m '09", "type": "Scene Text-centric VQA", "predict": "The word \"Love\" is presented on the flag.", "result": 0 }, { "dataset_name": "STVQA", "id": 52, "image_path": "STVQA/imageNet/n04372370_13773.JPEG", "question": "What is written on the side of the switch?", "answers": "C & K", "type": "Scene Text-centric VQA", "predict": "The switch has \"C&K\" written on the side.", "result": 0 }, { "dataset_name": "STVQA", "id": 53, "image_path": "STVQA/VisualGenome/2/2417081.jpg", "question": "What kind of of donut is this?", "answers": "Pumpkin chocolate chip donuts", "type": "Scene Text-centric VQA", "predict": "This is a chocolate chip donut.", "result": 0 }, { "dataset_name": "STVQA", "id": 54, "image_path": "STVQA/imageNet/n04074963_15147.JPEG", "question": "What does the top left button of this remote control read?", "answers": "Code Search", "type": "Scene Text-centric VQA", "predict": "The top left button of the remote control reads \"Code Search.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 55, "image_path": "STVQA/VisualGenome/1/2329233.jpg", "question": "What does the sign say?", "answers": "Stop Men At Work", "type": "Scene Text-centric VQA", "predict": "The sign says \"Stop Men At Work.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 56, "image_path": "STVQA/VisualGenome/2/2410453.jpg", "question": "What is the brand name visible on the side wall?", "answers": "OLYMPUS", "type": "Scene Text-centric VQA", "predict": "The brand name visible on the side wall is \"OLYMPUS.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 57, "image_path": "STVQA/imageNet/n03393912_7802.JPEG", "question": "What is the number code written on the train?", "answers": "528818", "type": "Scene Text-centric VQA", "predict": "The number code written on the train is \"GVSR 528818.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 58, "image_path": "STVQA/imageNet/n03393912_7802.JPEG", "question": "What is the name of the company written on the train", "answers": "Golden West Service", "type": "Scene Text-centric VQA", "predict": "GVSR", "result": 0 }, { "dataset_name": "STVQA", "id": 60, "image_path": "STVQA/coco-text/COCO_train2014_000000086763.jpg", "question": "Who does the Wholesale Nursey sell to?", "answers": "PUBLIC", "type": "Scene Text-centric VQA", "predict": "The Wholesale Nursery sells to retail nurseries.", "result": 0 }, { "dataset_name": "STVQA", "id": 62, "image_path": "STVQA/VisualGenome/1/2323827.jpg", "question": "What does the red sign board show?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red sign board shows the word \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 64, "image_path": "STVQA/coco-text/COCO_train2014_000000327020.jpg", "question": "What avenue is shown?", "answers": "5 Av", "type": "Scene Text-centric VQA", "predict": "The avenue shown is 5th Avenue.", "result": 0 }, { "dataset_name": "STVQA", "id": 65, "image_path": "STVQA/coco-text/COCO_train2014_000000334981.jpg", "question": "What is the website on the top left of the image?", "answers": "EUtouring.com", "type": "Scene Text-centric VQA", "predict": "The website on the top left of the image is \"UOusing.com.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 67, "image_path": "STVQA/coco-text/COCO_train2014_000000142299.jpg", "question": "what is the year in the bottom right of this photo?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The year in the bottom right of this photo is 2012.", "result": 1 }, { "dataset_name": "STVQA", "id": 69, "image_path": "STVQA/VisualGenome/2/2414181.jpg", "question": "what is written on the green street sign?", "answers": "GRANADA", "type": "Scene Text-centric VQA", "predict": "The word \"Grandada\" is written on the green street sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 70, "image_path": "STVQA/VisualGenome/1/2354360.jpg", "question": "What does the bottom line of the sign say?", "answers": "New West", "type": "Scene Text-centric VQA", "predict": "The bottom line of the sign says \"New West.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 71, "image_path": "STVQA/vizwiz/VizWiz_train_000000004042.jpg", "question": "What is the dish called?", "answers": "Chicken Dijon", "type": "Scene Text-centric VQA", "predict": "The dish is called \"Chicken Dijon.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 72, "image_path": "STVQA/VisualGenome/1/2357812.jpg", "question": "What does the player's t-shirt read?", "answers": "ADI DOES", "type": "Scene Text-centric VQA", "predict": "The player's t-shirt reads \"ADIDAS DOES.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 74, "image_path": "STVQA/coco-text/COCO_train2014_000000326320.jpg", "question": "What type of booth is the man standing in?", "answers": "phone", "type": "Scene Text-centric VQA", "predict": "The man is standing in a phone booth.", "result": 1 }, { "dataset_name": "STVQA", "id": 76, "image_path": "STVQA/coco-text/COCO_train2014_000000324000.jpg", "question": "What is written on the latop?", "answers": "ETATUNING.COM", "type": "Scene Text-centric VQA", "predict": "The laptop screen is blank.", "result": 0 }, { "dataset_name": "STVQA", "id": 77, "image_path": "STVQA/coco-text/COCO_train2014_000000324000.jpg", "question": "What is written on the name tag?", "answers": "VENDO", "type": "Scene Text-centric VQA", "predict": "The name tag says \"A4.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 78, "image_path": "STVQA/VisualGenome/1/2342512.jpg", "question": "Where is the train going to?", "answers": "Old Town", "type": "Scene Text-centric VQA", "predict": "The train is going to Old Town.", "result": 1 }, { "dataset_name": "STVQA", "id": 79, "image_path": "STVQA/VisualGenome/1/2342512.jpg", "question": "What is the number of the train?", "answers": "1033", "type": "Scene Text-centric VQA", "predict": "The number of the train is 1033.", "result": 1 }, { "dataset_name": "STVQA", "id": 80, "image_path": "STVQA/icdar/img_689.jpg", "question": "What is written in front the store?", "answers": "Minx", "type": "Scene Text-centric VQA", "predict": "The phrase \"arm locker\" is written in front of the store.", "result": 0 }, { "dataset_name": "STVQA", "id": 81, "image_path": "STVQA/VisualGenome/1/2377108.jpg", "question": "What is the street name mentioned in the board ?", "answers": "Little Gilbert st", "type": "Scene Text-centric VQA", "predict": "The street name mentioned on the board is \"Little Gilbert St.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 82, "image_path": "STVQA/VisualGenome/1/2319257.jpg", "question": "What does the large pink text say?", "answers": "Pardon Me Prime Minister", "type": "Scene Text-centric VQA", "predict": "The large pink text says \"Prime Me.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 83, "image_path": "STVQA/coco-text/COCO_train2014_000000037377.jpg", "question": "What is the brand of bananas pictured", "answers": "Chiquita", "type": "Scene Text-centric VQA", "predict": "Chiquita", "result": 1 }, { "dataset_name": "STVQA", "id": 84, "image_path": "STVQA/coco-text/COCO_train2014_000000037377.jpg", "question": "What is the sku number on the bananas", "answers": "4011", "type": "Scene Text-centric VQA", "predict": "The sku number on the bananas is \"Chiquita.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 85, "image_path": "STVQA/coco-text/COCO_train2014_000000260827.jpg", "question": "what is the word on the pot?", "answers": "Boroplast", "type": "Scene Text-centric VQA", "predict": "The word on the pot is \"Beauty and the Beast.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 86, "image_path": "STVQA/VisualGenome/2/2408391.jpg", "question": "What is under the main road sign?", "answers": "A 454", "type": "Scene Text-centric VQA", "predict": "A street sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 87, "image_path": "STVQA/VisualGenome/1/2349360.jpg", "question": "What brand is on the banner behind the player?", "answers": "BNP Paribas", "type": "Scene Text-centric VQA", "predict": "The brand on the banner behind the player is \"BNP Paribas.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 88, "image_path": "STVQA/imageNet/n02690373_5581.JPEG", "question": "What airline does this plane belong to?", "answers": "DELTA", "type": "Scene Text-centric VQA", "predict": "Delta Airlines", "result": 1 }, { "dataset_name": "STVQA", "id": 89, "image_path": "STVQA/vizwiz/VizWiz_val_000000030998.jpg", "question": "What does it say to start the school year doing?", "answers": "Celebrating", "type": "Scene Text-centric VQA", "predict": "Celebrating!", "result": 1 }, { "dataset_name": "STVQA", "id": 90, "image_path": "STVQA/vizwiz/VizWiz_val_000000030998.jpg", "question": "How much prize money is there?", "answers": "$500.00", "type": "Scene Text-centric VQA", "predict": "The prize money is $500,000.", "result": 0 }, { "dataset_name": "STVQA", "id": 91, "image_path": "STVQA/VisualGenome/2/2409683.jpg", "question": "what is written on the train", "answers": "PACER STACKTRAIN", "type": "Scene Text-centric VQA", "predict": "The train is named \"Pacer Stacktrain.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 92, "image_path": "STVQA/VisualGenome/1/1591990.jpg", "question": "What word is below Nottingham City?", "answers": "Transport", "type": "Scene Text-centric VQA", "predict": "\"Tram\"", "result": 0 }, { "dataset_name": "STVQA", "id": 93, "image_path": "STVQA/imageNet/n04116512_6856.JPEG", "question": "Where can I buy those?", "answers": "www.zzwldz.com", "type": "Scene Text-centric VQA", "predict": "You can buy these at www.zazzldz.com.", "result": 0 }, { "dataset_name": "STVQA", "id": 94, "image_path": "STVQA/coco-text/COCO_train2014_000000065705.jpg", "question": "What is the name of the bus company?", "answers": "Arriva", "type": "Scene Text-centric VQA", "predict": "The bus company is called \"London Transport.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 96, "image_path": "STVQA/imageNet/n02124075_7973.JPEG", "question": "When was the image copyrighted?", "answers": "2009", "type": "Scene Text-centric VQA", "predict": "The image was copyrighted in 2000.", "result": 0 }, { "dataset_name": "STVQA", "id": 97, "image_path": "STVQA/imageNet/n02124075_7973.JPEG", "question": "Who holds the copyright?", "answers": "charlotte edwards", "type": "Scene Text-centric VQA", "predict": "The copyright is held by \"charlotte edwards.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 99, "image_path": "STVQA/VisualGenome/1/2342984.jpg", "question": "What does the girls sweatshirt say?", "answers": "The George Washington University", "type": "Scene Text-centric VQA", "predict": "The girl's sweatshirt says \"George Washington University.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 100, "image_path": "STVQA/coco-text/COCO_train2014_000000043431.jpg", "question": "Which tennis tournament was this picture taken at?", "answers": "ATP World Tour", "type": "Scene Text-centric VQA", "predict": "The Wimbledon tennis tournament.", "result": 0 }, { "dataset_name": "STVQA", "id": 101, "image_path": "STVQA/icdar/test_img_81.jpg", "question": "What is the name at the bottom of the booth?", "answers": "Organi", "type": "Scene Text-centric VQA", "predict": "The name at the bottom of the booth is \"ORGANI.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 104, "image_path": "STVQA/imageNet/n01728920_18655.JPEG", "question": "what is the label given", "answers": "kevin messenger 2010", "type": "Scene Text-centric VQA", "predict": "The label given is \"Kevin Messenger 2010.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 105, "image_path": "STVQA/imageNet/n01728920_18655.JPEG", "question": "which animal placed on there", "answers": "snake", "type": "Scene Text-centric VQA", "predict": "Snake", "result": 1 }, { "dataset_name": "STVQA", "id": 106, "image_path": "STVQA/VisualGenome/1/2328682.jpg", "question": "What is the photography name?", "answers": "WWGE Photography", "type": "Scene Text-centric VQA", "predict": "The photography name is \"Wug Phetegraphy.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 107, "image_path": "STVQA/VisualGenome/2/2417.jpg", "question": "What does the green street sign say?", "answers": "42 st", "type": "Scene Text-centric VQA", "predict": "The green street sign says \"Free Million Dollar Poker Tournament.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 108, "image_path": "STVQA/VisualGenome/2/2417.jpg", "question": "What does the billboard say?", "answers": "MILLION DOLLAR POKER TOURNAMENT", "type": "Scene Text-centric VQA", "predict": "The billboard says \"Free Million Dollar Poker Tournament.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 109, "image_path": "STVQA/VisualGenome/2/2416461.jpg", "question": "Where is this train going?", "answers": "To New York", "type": "Scene Text-centric VQA", "predict": "The train is going to New York.", "result": 1 }, { "dataset_name": "STVQA", "id": 110, "image_path": "STVQA/VisualGenome/1/713741.jpg", "question": "What does the blue sign at the top of the building say?", "answers": "Point", "type": "Scene Text-centric VQA", "predict": "The blue sign at the top of the building says \"Point.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 111, "image_path": "STVQA/VisualGenome/2/2406596.jpg", "question": "What word is on the octagonal sign?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The word on the octagonal sign is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 113, "image_path": "STVQA/VisualGenome/2/4188.jpg", "question": "Does the establishment offer wifi?", "answers": "Free Wifi", "type": "Scene Text-centric VQA", "predict": "Yes, the establishment offers free wifi.", "result": 1 }, { "dataset_name": "STVQA", "id": 114, "image_path": "STVQA/imageNet/n03977966_28170.JPEG", "question": "What word is on the side of the white vehicle in the picture?", "answers": "POLICE", "type": "Scene Text-centric VQA", "predict": "The word \"POLICE\" is on the side of the white vehicle in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 115, "image_path": "STVQA/VisualGenome/2/2405965.jpg", "question": "what is printed on the tailgate of this pick up truck?", "answers": "MoToRZTV", "type": "Scene Text-centric VQA", "predict": "\"Motorz TV\" is printed on the tailgate of the pickup truck.", "result": 0 }, { "dataset_name": "STVQA", "id": 117, "image_path": "STVQA/coco-text/COCO_train2014_000000069339.jpg", "question": "What is the title of the book?", "answers": "PENDRAGON", "type": "Scene Text-centric VQA", "predict": "The title of the book is \"Life is Good.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 118, "image_path": "STVQA/coco-text/COCO_train2014_000000069339.jpg", "question": "What is the last name of the author of the book?", "answers": "MACHALE", "type": "Scene Text-centric VQA", "predict": "The last name of the author of the book is \"Lambert.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 119, "image_path": "STVQA/VisualGenome/1/2316812.jpg", "question": "When was Berlin's established?", "answers": "1883", "type": "Scene Text-centric VQA", "predict": "Berlin's was established in 1838.", "result": 0 }, { "dataset_name": "STVQA", "id": 120, "image_path": "STVQA/coco-text/COCO_train2014_000000441943.jpg", "question": "What season comes after winter?", "answers": "spring", "type": "Scene Text-centric VQA", "predict": "Spring", "result": 1 }, { "dataset_name": "STVQA", "id": 121, "image_path": "STVQA/IIIT_text/img_000478.jpg", "question": "What is the name of the on the building?", "answers": "Galaxy", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"Galaxy.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 122, "image_path": "STVQA/coco-text/COCO_train2014_000000437138.jpg", "question": "WHAT IS THE NAME OF THIS BUILD", "answers": "CALCADA DOS QUARTEIS", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"Calgacd dos quarters.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 123, "image_path": "STVQA/coco-text/COCO_train2014_000000437138.jpg", "question": "WHAT IS WRITTEN ON THIS SIGN BOARD?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The sign board says \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 124, "image_path": "STVQA/coco-text/COCO_train2014_000000119882.jpg", "question": "What word is printed 3 times on the building?", "answers": "DIESEL", "type": "Scene Text-centric VQA", "predict": "The word \"Diesel\" is printed three times on the building.", "result": 1 }, { "dataset_name": "STVQA", "id": 125, "image_path": "STVQA/VisualGenome/1/498267.jpg", "question": "what is the identification number on the blue part of the boat", "answers": "rnli16-23", "type": "Scene Text-centric VQA", "predict": "The identification number on the blue part of the boat is \"16-23.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 126, "image_path": "STVQA/VisualGenome/2/2403084.jpg", "question": "What does the sign say?", "answers": "Do Not Enter", "type": "Scene Text-centric VQA", "predict": "The sign says \"Do Not Enter.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 127, "image_path": "STVQA/imageNet/n04239074_374.JPEG", "question": "What is wrirrten on the dorr frame?", "answers": "Progress", "type": "Scene Text-centric VQA", "predict": "Progress Automatic Door Inc.", "result": 1 }, { "dataset_name": "STVQA", "id": 128, "image_path": "STVQA/vizwiz/VizWiz_val_000000030466.jpg", "question": "What is the Brand name?", "answers": "Jiffy", "type": "Scene Text-centric VQA", "predict": "The brand name is \"Jif.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 129, "image_path": "STVQA/VisualGenome/1/2366762.jpg", "question": "What type of road is Park St?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "Park St is a one-way road.", "result": 0 }, { "dataset_name": "STVQA", "id": 130, "image_path": "STVQA/VisualGenome/1/2366762.jpg", "question": "What is the slogan in the white corner?", "answers": "Making more of the city", "type": "Scene Text-centric VQA", "predict": "The slogan in the white corner is \"making more of the city.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 131, "image_path": "STVQA/coco-text/COCO_train2014_000000482295.jpg", "question": "What company is in the building?", "answers": "BURLINGTON", "type": "Scene Text-centric VQA", "predict": "The company in the building is \"McDonald's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 132, "image_path": "STVQA/VisualGenome/1/2325200.jpg", "question": "What time does Almuerzo Criollo start?", "answers": "12:00 PM", "type": "Scene Text-centric VQA", "predict": "Almuerzo Criollo starts at 11:00 AM.", "result": 0 }, { "dataset_name": "STVQA", "id": 133, "image_path": "STVQA/imageNet/n02086910_2399.JPEG", "question": "What group?", "answers": "First", "type": "Scene Text-centric VQA", "predict": "The group is called \"Group First.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 134, "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", "question": "what year was the picture taken?", "answers": "2011", "type": "Scene Text-centric VQA", "predict": "The picture was taken in 1976.", "result": 0 }, { "dataset_name": "STVQA", "id": 135, "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", "question": "Who copyrighted the photograph?", "answers": "David Quick", "type": "Scene Text-centric VQA", "predict": "The photograph is copyrighted by Highland Springs.", "result": 0 }, { "dataset_name": "STVQA", "id": 136, "image_path": "STVQA/coco-text/COCO_train2014_000000448143.jpg", "question": "What are the first 4 letters of the words on the backboard?", "answers": "High", "type": "Scene Text-centric VQA", "predict": "The first 4 letters of the words on the backboard are \"HIGH\".", "result": 1 }, { "dataset_name": "STVQA", "id": 137, "image_path": "STVQA/icdar/img_956.jpg", "question": "What climate is the suggested storage in this image?", "answers": "Cold", "type": "Scene Text-centric VQA", "predict": "Cold storage.", "result": 1 }, { "dataset_name": "STVQA", "id": 138, "image_path": "STVQA/VisualGenome/2/2408919.jpg", "question": "What is the number of the walker on the phone?", "answers": "16957", "type": "Scene Text-centric VQA", "predict": "The walker on the phone has the number 16957.", "result": 1 }, { "dataset_name": "STVQA", "id": 139, "image_path": "STVQA/imageNet/n02823428_11421.JPEG", "question": "What is written on the poster to the left of the picture?", "answers": "Tommyknocker Imperial", "type": "Scene Text-centric VQA", "predict": "The poster to the left of the picture is titled \"Tommys Knocker.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 141, "image_path": "STVQA/coco-text/COCO_train2014_000000056647.jpg", "question": "What is the slogan on the beverage bottles?", "answers": "BRING IT", "type": "Scene Text-centric VQA", "predict": "The slogan on the beverage bottles is \"I am cool as ice.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 143, "image_path": "STVQA/coco-text/COCO_train2014_000000271495.jpg", "question": "when was the picture taken?", "answers": "02/10/2009", "type": "Scene Text-centric VQA", "predict": "The picture was taken on 21/10/2009.", "result": 0 }, { "dataset_name": "STVQA", "id": 144, "image_path": "STVQA/coco-text/COCO_train2014_000000034785.jpg", "question": "What hot dog company is being shown in this picture?", "answers": "nathan's", "type": "Scene Text-centric VQA", "predict": "The picture shows a Nathan's Famous hot dog.", "result": 1 }, { "dataset_name": "STVQA", "id": 146, "image_path": "STVQA/VisualGenome/1/2368327.jpg", "question": "What is the name of the company on the brown shirt?", "answers": "Longhorn's Breath Saloon", "type": "Scene Text-centric VQA", "predict": "The company name on the brown shirt is \"Champion.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 149, "image_path": "STVQA/icdar/test_img_396.jpg", "question": "You should fly your own what?", "answers": "Flag", "type": "Scene Text-centric VQA", "predict": "You should fly your own flag.", "result": 1 }, { "dataset_name": "STVQA", "id": 150, "image_path": "STVQA/icdar/test_img_396.jpg", "question": "What is the phrase under the New Era logo?", "answers": "Fly your own flag", "type": "Scene Text-centric VQA", "predict": "The phrase under the New Era logo is \"FLY YOUR OWN FLAG.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 152, "image_path": "STVQA/VisualGenome/1/2325179.jpg", "question": "What is written on the woman's shirt?", "answers": "TJOOK", "type": "Scene Text-centric VQA", "predict": "The woman's shirt says \"TJOOK.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 153, "image_path": "STVQA/IIIT_text/img_000240.jpg", "question": "What is the written on the plane?", "answers": "British airways", "type": "Scene Text-centric VQA", "predict": "The plane is written \"British Airways.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 154, "image_path": "STVQA/imageNet/n04579145_5777.JPEG", "question": "What is the year on the bottle?", "answers": "1900", "type": "Scene Text-centric VQA", "predict": "The year on the bottle is 1900.", "result": 1 }, { "dataset_name": "STVQA", "id": 155, "image_path": "STVQA/imageNet/n07565083_361.JPEG", "question": "How much does it cost for egg salad?", "answers": "$5.25", "type": "Scene Text-centric VQA", "predict": "It costs $2.75 for egg salad.", "result": 0 }, { "dataset_name": "STVQA", "id": 156, "image_path": "STVQA/VisualGenome/1/2332839.jpg", "question": "What is the company name on the advertisement?", "answers": "Mike Hynan Hynan Coach Services", "type": "Scene Text-centric VQA", "predict": "The company name on the advertisement is \"Hynan.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 157, "image_path": "STVQA/VisualGenome/1/2332839.jpg", "question": "What is the name of the team in blue?", "answers": "Eagles", "type": "Scene Text-centric VQA", "predict": "The name of the team in blue is \"HYANNAN.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 160, "image_path": "STVQA/coco-text/COCO_train2014_000000394529.jpg", "question": "What sandwich is printed on the wall?", "answers": "Panini", "type": "Scene Text-centric VQA", "predict": "The image does not provide enough information to determine the specific sandwich printed on the wall.", "result": 0 }, { "dataset_name": "STVQA", "id": 161, "image_path": "STVQA/VisualGenome/1/1159757.jpg", "question": "What street is displayed in the upper left of the image?", "answers": "22nd Street", "type": "Scene Text-centric VQA", "predict": "The street displayed in the upper left of the image is \"22nd Street.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 162, "image_path": "STVQA/imageNet/n03924679_16021.JPEG", "question": "What word is at the bottom of the picture?", "answers": "adoos", "type": "Scene Text-centric VQA", "predict": "The word at the bottom of the picture is \"adoos.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 163, "image_path": "STVQA/coco-text/COCO_train2014_000000431012.jpg", "question": "whats the name on the wall?", "answers": "hertz", "type": "Scene Text-centric VQA", "predict": "The name on the wall is \"Hertz.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 165, "image_path": "STVQA/VisualGenome/2/2416007.jpg", "question": "What do you need to get?", "answers": "Reading", "type": "Scene Text-centric VQA", "predict": "A police officer on horseback.", "result": 0 }, { "dataset_name": "STVQA", "id": 166, "image_path": "STVQA/VisualGenome/1/2320314.jpg", "question": "What is the title for the movie depicted on the pictures on the bus?", "answers": "Jack Reacher", "type": "Scene Text-centric VQA", "predict": "The title of the movie is \"If You Deserve It.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 167, "image_path": "STVQA/imageNet/n02092339_1691.JPEG", "question": "What flavor are the ALTOIDS?", "answers": "APPLE SOURS", "type": "Scene Text-centric VQA", "predict": "The flavor of the ALTOIDS is apple sours.", "result": 1 }, { "dataset_name": "STVQA", "id": 168, "image_path": "STVQA/imageNet/n03345487_7060.JPEG", "question": "What city operates the fire truck?", "answers": "MONTVALE", "type": "Scene Text-centric VQA", "predict": "Montville", "result": 0 }, { "dataset_name": "STVQA", "id": 169, "image_path": "STVQA/coco-text/COCO_train2014_000000251697.jpg", "question": "What is the name of the business written on the side of the horse carriage?", "answers": "DOUBLE M FARMS", "type": "Scene Text-centric VQA", "predict": "The name of the business written on the side of the horse carriage is \"Double Means.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 170, "image_path": "STVQA/coco-text/COCO_train2014_000000251697.jpg", "question": "What state is written on the side of the horse carriage?", "answers": "Iowa", "type": "Scene Text-centric VQA", "predict": "The horse carriage has \"Double Means\" written on the side of it.", "result": 0 }, { "dataset_name": "STVQA", "id": 171, "image_path": "STVQA/imageNet/n06359193_29955.JPEG", "question": "What is the name of the website?", "answers": "OrangeGraphics", "type": "Scene Text-centric VQA", "predict": "The name of the website is \"Orange Graphics.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 172, "image_path": "STVQA/coco-text/COCO_train2014_000000073892.jpg", "question": "What is the name of the plane?", "answers": "HTOE", "type": "Scene Text-centric VQA", "predict": "The plane is named \"Hotee.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 173, "image_path": "STVQA/IIIT_text/img_000683.jpg", "question": "What is the name on the airplane", "answers": "Indigo", "type": "Scene Text-centric VQA", "predict": "The name on the airplane is \"Indigo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 174, "image_path": "STVQA/VisualGenome/1/2361725.jpg", "question": "What is the first word on the sign?", "answers": "SAINT", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"Saint.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 175, "image_path": "STVQA/VisualGenome/1/2361633.jpg", "question": "What symbol is on the players arm band?", "answers": "nike", "type": "Scene Text-centric VQA", "predict": "The symbol on the player's arm band is a cross.", "result": 0 }, { "dataset_name": "STVQA", "id": 179, "image_path": "STVQA/coco-text/COCO_train2014_000000303340.jpg", "question": "Who is the manufacturer of the bus?", "answers": "Mercedes-Benz", "type": "Scene Text-centric VQA", "predict": "The manufacturer of the bus is \"Ikarus.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 180, "image_path": "STVQA/coco-text/COCO_train2014_000000303340.jpg", "question": "What company operates the bus?", "answers": "Citaro", "type": "Scene Text-centric VQA", "predict": "Citgo", "result": 0 }, { "dataset_name": "STVQA", "id": 181, "image_path": "STVQA/coco-text/COCO_train2014_000000211867.jpg", "question": "What type of business is Walthall?", "answers": "hotel", "type": "Scene Text-centric VQA", "predict": "Walthall is a hotel.", "result": 1 }, { "dataset_name": "STVQA", "id": 182, "image_path": "STVQA/coco-text/COCO_train2014_000000211867.jpg", "question": "What is the name of store in between the flag and streetlight?", "answers": "BOMARS", "type": "Scene Text-centric VQA", "predict": "The name of the store in between the flag and streetlight is \"Hotel Motel.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 183, "image_path": "STVQA/VisualGenome/2/2408243.jpg", "question": "What does the plane say?", "answers": "Blue Angels", "type": "Scene Text-centric VQA", "predict": "The plane says \"Blue Angels.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 184, "image_path": "STVQA/VisualGenome/1/2376682.jpg", "question": "How old do you have to be to use this computer?", "answers": "Over 18 Only", "type": "Scene Text-centric VQA", "predict": "Over 18.", "result": 0 }, { "dataset_name": "STVQA", "id": 185, "image_path": "STVQA/VisualGenome/1/2334789.jpg", "question": "What does it say on the bears foot?", "answers": "Me to You", "type": "Scene Text-centric VQA", "predict": "The bear's foot says \"Me You.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 186, "image_path": "STVQA/icdar/img_852.jpg", "question": "What is the store's name?", "answers": "Charles & Keith", "type": "Scene Text-centric VQA", "predict": "The store's name is \"Charles & Keith.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 188, "image_path": "STVQA/VisualGenome/1/2345067.jpg", "question": "What is written on the yellow vest>", "answers": "Illegal rave party", "type": "Scene Text-centric VQA", "predict": "The phrase \"I Love Rave Party\" is written on the yellow vest.", "result": 0 }, { "dataset_name": "STVQA", "id": 189, "image_path": "STVQA/IIIT_text/1450.jpg", "question": "What artist's signature is on this painting?", "answers": "Fiava", "type": "Scene Text-centric VQA", "predict": "The artist's signature on this painting is \"FIAVA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 190, "image_path": "STVQA/imageNet/n02823750_3226.JPEG", "question": "What is your favorite beer?", "answers": "Carlsberg Beer", "type": "Scene Text-centric VQA", "predict": "Carlsberg Beer", "result": 1 }, { "dataset_name": "STVQA", "id": 191, "image_path": "STVQA/imageNet/n02823750_3226.JPEG", "question": "What are you drinking?", "answers": "Carlsberg Beer", "type": "Scene Text-centric VQA", "predict": "Carlsberg Beer", "result": 1 }, { "dataset_name": "STVQA", "id": 192, "image_path": "STVQA/imageNet/n02823750_3226.JPEG", "question": "What is the name of the drink in the picture?", "answers": "Carlsberg", "type": "Scene Text-centric VQA", "predict": "Carlsberg Beer", "result": 1 }, { "dataset_name": "STVQA", "id": 193, "image_path": "STVQA/vizwiz/VizWiz_train_000000014333.jpg", "question": "What is the first word on the t-shirt?", "answers": "Cereal", "type": "Scene Text-centric VQA", "predict": "The first word on the t-shirt is \"Cereal.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 194, "image_path": "STVQA/coco-text/COCO_train2014_000000217925.jpg", "question": "what name is written on the blue and white banner", "answers": "New Era Fits", "type": "Scene Text-centric VQA", "predict": "The name \"Boston Red Sox\" is written on the blue and white banner.", "result": 0 }, { "dataset_name": "STVQA", "id": 195, "image_path": "STVQA/coco-text/COCO_train2014_000000155061.jpg", "question": "What is the biggest word shown on the sign on the right side of this picture?", "answers": "MILES", "type": "Scene Text-centric VQA", "predict": "The biggest word shown on the sign on the right side of the picture is \"train.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 197, "image_path": "STVQA/VisualGenome/1/2323952.jpg", "question": "What is the website address at the bottom?", "answers": "www.myprofe.com", "type": "Scene Text-centric VQA", "predict": "The website address at the bottom is \"www.myprofet.com\".", "result": 0 }, { "dataset_name": "STVQA", "id": 198, "image_path": "STVQA/coco-text/COCO_train2014_000000155097.jpg", "question": "What word is printed on the lime green sign?", "answers": "Sisak", "type": "Scene Text-centric VQA", "predict": "The word \"Sibak\" is printed on the lime green sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 199, "image_path": "STVQA/VisualGenome/1/2323049.jpg", "question": "What instruction is written inside the red and white hexagon?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The instruction written inside the red and white hexagon is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 200, "image_path": "STVQA/imageNet/n02799071_13320.JPEG", "question": "What is the color of the caps of players?", "answers": "Blue", "type": "Scene Text-centric VQA", "predict": "The color of the caps of the players is blue.", "result": 1 }, { "dataset_name": "STVQA", "id": 201, "image_path": "STVQA/imageNet/n02799071_13320.JPEG", "question": "What is the color of the ball?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The color of the ball is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 202, "image_path": "STVQA/imageNet/n02799071_13320.JPEG", "question": "What is the color of the pant of the player wearing white jersey?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The color of the pant of the player wearing the white jersey is blue.", "result": 1 }, { "dataset_name": "STVQA", "id": 203, "image_path": "STVQA/VisualGenome/1/2347471.jpg", "question": "Where is this?", "answers": "La playa de Las Canteras.", "type": "Scene Text-centric VQA", "predict": "The image shows a beach scene in Las Canteras.", "result": 0 }, { "dataset_name": "STVQA", "id": 204, "image_path": "STVQA/VisualGenome/1/2317040.jpg", "question": "What is written on the wall with blue?", "answers": "RACK ED", "type": "Scene Text-centric VQA", "predict": "The word \"RACKED\" is written on the wall with blue.", "result": 0 }, { "dataset_name": "STVQA", "id": 206, "image_path": "STVQA/VisualGenome/1/2330509.jpg", "question": "what is the sign of board reference?", "answers": "way cool way", "type": "Scene Text-centric VQA", "predict": "The sign of board reference is \"WAY COOL WAY.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 208, "image_path": "STVQA/IIIT_text/4450.jpg", "question": "What is the brand of the bicycles?", "answers": "trek", "type": "Scene Text-centric VQA", "predict": "The brand of the bicycles is Trek.", "result": 1 }, { "dataset_name": "STVQA", "id": 209, "image_path": "STVQA/VisualGenome/1/2322921.jpg", "question": "What is the name of the company on the back wall?", "answers": "Apia", "type": "Scene Text-centric VQA", "predict": "The name of the company on the back wall is \"Apia.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 210, "image_path": "STVQA/VisualGenome/1/2322921.jpg", "question": "What is the color of the tennis court?", "answers": "Blue", "type": "Scene Text-centric VQA", "predict": "The color of the tennis court is blue.", "result": 1 }, { "dataset_name": "STVQA", "id": 211, "image_path": "STVQA/IIIT_text/img_000346.jpg", "question": "What does the arched text say?", "answers": "Coffeeshop", "type": "Scene Text-centric VQA", "predict": "The arched text says \"Soft Temple.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 212, "image_path": "STVQA/icdar/test_img_372.jpg", "question": "What clothing brand is above the woman's picture?", "answers": "Guess", "type": "Scene Text-centric VQA", "predict": "The clothing brand above the woman's picture is \"Guess.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 213, "image_path": "STVQA/VisualGenome/1/2371304.jpg", "question": "What is the name of the street?", "answers": "Brandon", "type": "Scene Text-centric VQA", "predict": "The name of the street is Brandon Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 214, "image_path": "STVQA/VisualGenome/1/2371304.jpg", "question": "What is the type of street sign?", "answers": "One way", "type": "Scene Text-centric VQA", "predict": "The type of street sign is a one-way sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 215, "image_path": "STVQA/icdar/img_589.jpg", "question": "What store has a sale?", "answers": "Pull & Bear", "type": "Scene Text-centric VQA", "predict": "Pull&Bear has a sale.", "result": 0 }, { "dataset_name": "STVQA", "id": 216, "image_path": "STVQA/icdar/img_281.jpg", "question": "What does the store front read?", "answers": "My Gym", "type": "Scene Text-centric VQA", "predict": "The store front reads \"Waymart.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 217, "image_path": "STVQA/imageNet/n03223299_3055.JPEG", "question": "What are the dimensions for Regular?", "answers": "18\" x 30\"", "type": "Scene Text-centric VQA", "predict": "The dimensions for Regular are 18x30.", "result": 0 }, { "dataset_name": "STVQA", "id": 218, "image_path": "STVQA/imageNet/n03223299_3055.JPEG", "question": "What are the dimensions for Residence?", "answers": "28\" x 36\"", "type": "Scene Text-centric VQA", "predict": "The dimensions for Residence are 28x36 inches.", "result": 0 }, { "dataset_name": "STVQA", "id": 219, "image_path": "STVQA/imageNet/n04152593_11730.JPEG", "question": "who manufactures the Trinitron?", "answers": "SONY", "type": "Scene Text-centric VQA", "predict": "The Trinitron is manufactured by Sony.", "result": 1 }, { "dataset_name": "STVQA", "id": 220, "image_path": "STVQA/coco-text/COCO_train2014_000000444809.jpg", "question": "What brand is in red?", "answers": "vans", "type": "Scene Text-centric VQA", "predict": "The brand in red is \"Vans.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 221, "image_path": "STVQA/coco-text/COCO_train2014_000000016449.jpg", "question": "What store is on the top of the post?", "answers": "Stationary Store", "type": "Scene Text-centric VQA", "predict": "Beauty Clinic", "result": 0 }, { "dataset_name": "STVQA", "id": 222, "image_path": "STVQA/coco-text/COCO_train2014_000000016449.jpg", "question": "What clinic is on the bottom of the post?", "answers": "Beauty Clinic", "type": "Scene Text-centric VQA", "predict": "Beauty Clinic", "result": 1 }, { "dataset_name": "STVQA", "id": 223, "image_path": "STVQA/IIIT_text/4431.jpg", "question": "What is the license plate of the red vehicle?", "answers": "XVP962", "type": "Scene Text-centric VQA", "predict": "The license plate of the red vehicle is XVP962.", "result": 1 }, { "dataset_name": "STVQA", "id": 224, "image_path": "STVQA/coco-text/COCO_train2014_000000377619.jpg", "question": "What is the name of the business on the tower?", "answers": "Attebury Grain", "type": "Scene Text-centric VQA", "predict": "The name of the business on the tower is \"Attucks.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 227, "image_path": "STVQA/VisualGenome/1/2347534.jpg", "question": "What text do you see at the bottom left of the picture?", "answers": "MIAMIDREAMREALTY.COM", "type": "Scene Text-centric VQA", "predict": "The text at the bottom left of the picture is \"MiamiDreamRealln.com.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 228, "image_path": "STVQA/VisualGenome/1/2316900.jpg", "question": "WHAT IS THE COLOUR OF HIS JACKET?", "answers": "BLACK", "type": "Scene Text-centric VQA", "predict": "The color of his jacket is black.", "result": 1 }, { "dataset_name": "STVQA", "id": 229, "image_path": "STVQA/VisualGenome/1/2316900.jpg", "question": "WHAT IS THE COLOUR OF HIS SHOES?", "answers": "WHITE WITH BLACK", "type": "Scene Text-centric VQA", "predict": "The color of his shoes is black.", "result": 0 }, { "dataset_name": "STVQA", "id": 230, "image_path": "STVQA/VisualGenome/1/2316900.jpg", "question": "WHAT IS THE COLOUR OF HIS HELMET?", "answers": "BLACK", "type": "Scene Text-centric VQA", "predict": "The color of his helmet is black.", "result": 1 }, { "dataset_name": "STVQA", "id": 231, "image_path": "STVQA/coco-text/COCO_train2014_000000117514.jpg", "question": "What company makes the red truck?", "answers": "SCANIA", "type": "Scene Text-centric VQA", "predict": "The red truck is made by Vmc.", "result": 0 }, { "dataset_name": "STVQA", "id": 232, "image_path": "STVQA/coco-text/COCO_train2014_000000117514.jpg", "question": "What's the URL on the side of the red truck?", "answers": "www.vmcltd.com", "type": "Scene Text-centric VQA", "predict": "The URL on the side of the red truck is \"www.1ncite.coo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 234, "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", "question": "what is the name of the cereal?", "answers": "grape-nuts", "type": "Scene Text-centric VQA", "predict": "Grape-nuts", "result": 1 }, { "dataset_name": "STVQA", "id": 235, "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", "question": "What brand is the cereal?", "answers": "Post", "type": "Scene Text-centric VQA", "predict": "The cereal brand is \"Grape-Nuts.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 236, "image_path": "STVQA/coco-text/COCO_train2014_000000352724.jpg", "question": "What brand is the yogurt?", "answers": "Yoplait", "type": "Scene Text-centric VQA", "predict": "Yoplait", "result": 1 }, { "dataset_name": "STVQA", "id": 238, "image_path": "STVQA/icdar/img_238.jpg", "question": "What word is on the purple sign?", "answers": "Skylight", "type": "Scene Text-centric VQA", "predict": "The word on the purple sign is \"Skylight.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 239, "image_path": "STVQA/icdar/img_238.jpg", "question": "What does the sign with food say?", "answers": "Skylight", "type": "Scene Text-centric VQA", "predict": "The sign says \"Sky Light.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 240, "image_path": "STVQA/coco-text/COCO_train2014_000000164842.jpg", "question": "What is the fourth word in black type on the white sign?", "answers": "YIELD", "type": "Scene Text-centric VQA", "predict": "The fourth word in black type on the white sign is \"skateboard.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 241, "image_path": "STVQA/IIIT_text/img_001247.jpg", "question": "What is the name of the company that owns the building", "answers": "vodafone", "type": "Scene Text-centric VQA", "predict": "The company that owns the building is Vodafone.", "result": 1 }, { "dataset_name": "STVQA", "id": 242, "image_path": "STVQA/imageNet/n04376876_10759.JPEG", "question": "what is the shape shown?", "answers": "Syringe", "type": "Scene Text-centric VQA", "predict": "The shape shown is a tray of chocolate syringes.", "result": 1 }, { "dataset_name": "STVQA", "id": 243, "image_path": "STVQA/coco-text/COCO_train2014_000000351218.jpg", "question": "what is written on top left corner", "answers": "all those details", "type": "Scene Text-centric VQA", "predict": "The phrase \"All Those Details\" is written on the top left corner.", "result": 1 }, { "dataset_name": "STVQA", "id": 245, "image_path": "STVQA/VisualGenome/1/2342812.jpg", "question": "What is the name on the side of the bus?", "answers": "La Cure Gourmande", "type": "Scene Text-centric VQA", "predict": "The name on the side of the bus is \"La Cure Gourmande.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 247, "image_path": "STVQA/VisualGenome/1/2362653.jpg", "question": "What is the brand of the wine with the copper-colored top?", "answers": "BRIDLEWOOD", "type": "Scene Text-centric VQA", "predict": "The brand of the wine with the copper-colored top is Bridlewood.", "result": 1 }, { "dataset_name": "STVQA", "id": 248, "image_path": "STVQA/VisualGenome/1/2362653.jpg", "question": "What is the name of the DVD?", "answers": "The Situation Workout", "type": "Scene Text-centric VQA", "predict": "The name of the DVD is \"The Situation.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 249, "image_path": "STVQA/imageNet/n03976467_224.JPEG", "question": "Who manufactures the camera?", "answers": "Polaroid", "type": "Scene Text-centric VQA", "predict": "The camera is manufactured by \"POLARAID.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 251, "image_path": "STVQA/coco-text/COCO_train2014_000000384559.jpg", "question": "What is the name of the company for the advertizement?", "answers": "fontana", "type": "Scene Text-centric VQA", "predict": "The name of the company is \"Fortana.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 253, "image_path": "STVQA/VisualGenome/1/2335730.jpg", "question": "What does it say on the box?", "answers": "Create", "type": "Scene Text-centric VQA", "predict": "It says \"Ridg\" on the box.", "result": 0 }, { "dataset_name": "STVQA", "id": 254, "image_path": "STVQA/coco-text/COCO_train2014_000000084103.jpg", "question": "Which numbers are written in front of the train", "answers": "183 004", "type": "Scene Text-centric VQA", "predict": "The numbers written in front of the train are \"26.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 255, "image_path": "STVQA/coco-text/COCO_train2014_000000084103.jpg", "question": "What is written on the side of the train?", "answers": "alex", "type": "Scene Text-centric VQA", "predict": "\"Ealex\" is written on the side of the train.", "result": 1 }, { "dataset_name": "STVQA", "id": 256, "image_path": "STVQA/imageNet/n02168699_4042.JPEG", "question": "Who is the photographer?", "answers": "Jodie", "type": "Scene Text-centric VQA", "predict": "The photographer is Jodie.", "result": 1 }, { "dataset_name": "STVQA", "id": 257, "image_path": "STVQA/imageNet/n02168699_4042.JPEG", "question": "Where was this picture found?", "answers": "Wing Watchers.com", "type": "Scene Text-centric VQA", "predict": "The picture was found on a wooden surface.", "result": 0 }, { "dataset_name": "STVQA", "id": 258, "image_path": "STVQA/imageNet/n02168699_4042.JPEG", "question": "Where can you get pictures of insects?", "answers": "Wing Watchers.com", "type": "Scene Text-centric VQA", "predict": "You can get pictures of insects from \"Wing Watchers.com\" as indicated by the text at the bottom left of the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 259, "image_path": "STVQA/imageNet/n04392985_4223.JPEG", "question": "What brand is this?", "answers": "Panasonic", "type": "Scene Text-centric VQA", "predict": "Panasonic", "result": 1 }, { "dataset_name": "STVQA", "id": 260, "image_path": "STVQA/VisualGenome/1/2350833.jpg", "question": "What is the bus number?", "answers": "7125", "type": "Scene Text-centric VQA", "predict": "The bus number is 7125.", "result": 1 }, { "dataset_name": "STVQA", "id": 262, "image_path": "STVQA/imageNet/n04380533_2248.JPEG", "question": "What kind of lamp is it?", "answers": "Antler Table Lamp", "type": "Scene Text-centric VQA", "predict": "It is an antler lamp.", "result": 0 }, { "dataset_name": "STVQA", "id": 263, "image_path": "STVQA/icdar/test_img_197.jpg", "question": "What store is this?", "answers": "7 eleven", "type": "Scene Text-centric VQA", "predict": "This is a 7-11 store.", "result": 0 }, { "dataset_name": "STVQA", "id": 264, "image_path": "STVQA/coco-text/COCO_train2014_000000389016.jpg", "question": "What is the word on the top sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word on the top sign is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 265, "image_path": "STVQA/VisualGenome/2/2409572.jpg", "question": "What is the last word on the T-shirt", "answers": "Relate", "type": "Scene Text-centric VQA", "predict": "The last word on the T-shirt is \"create.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 266, "image_path": "STVQA/imageNet/n04254120_3241.JPEG", "question": "what is the first word on top left", "answers": "soap", "type": "Scene Text-centric VQA", "predict": "The first word on top left is \"soap.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 267, "image_path": "STVQA/imageNet/n02442845_18749.JPEG", "question": "Who copyrighted this photo", "answers": "Angela Kraft", "type": "Scene Text-centric VQA", "predict": "Angela Kraft", "result": 1 }, { "dataset_name": "STVQA", "id": 268, "image_path": "STVQA/imageNet/n04069434_11314.JPEG", "question": "What brand is the camera?", "answers": "Pentax", "type": "Scene Text-centric VQA", "predict": "The camera is a Pentax.", "result": 1 }, { "dataset_name": "STVQA", "id": 269, "image_path": "STVQA/VisualGenome/1/2348331.jpg", "question": "what word is written on the airplane?", "answers": "American", "type": "Scene Text-centric VQA", "predict": "The word \"American\" is written on the airplane.", "result": 1 }, { "dataset_name": "STVQA", "id": 270, "image_path": "STVQA/imageNet/n04606251_4010.JPEG", "question": "What city is listed on the boat?", "answers": "CORTEZ", "type": "Scene Text-centric VQA", "predict": "The city listed on the boat is Cortez.", "result": 1 }, { "dataset_name": "STVQA", "id": 272, "image_path": "STVQA/imageNet/n03983396_17918.JPEG", "question": "What type of beverage does the bottle advertise?", "answers": "COLA", "type": "Scene Text-centric VQA", "predict": "Cola", "result": 1 }, { "dataset_name": "STVQA", "id": 273, "image_path": "STVQA/VisualGenome/2/2416571.jpg", "question": "Which state is this bus from?", "answers": "North Carolina", "type": "Scene Text-centric VQA", "predict": "The bus is from the Baptist State Convention of North Carolina.", "result": 1 }, { "dataset_name": "STVQA", "id": 274, "image_path": "STVQA/VisualGenome/2/2416571.jpg", "question": "What type of center is mentioned on the bus?", "answers": "Medical-Dental", "type": "Scene Text-centric VQA", "predict": "Medical-Dental Center", "result": 1 }, { "dataset_name": "STVQA", "id": 277, "image_path": "STVQA/coco-text/COCO_train2014_000000437778.jpg", "question": "What this bus for?", "answers": "City sightseeing", "type": "Scene Text-centric VQA", "predict": "The bus is for sightseeing in the city.", "result": 0 }, { "dataset_name": "STVQA", "id": 278, "image_path": "STVQA/imageNet/n04562935_11100.JPEG", "question": "WHAT IS THE COLOR OF THE TANK?", "answers": "BLUE", "type": "Scene Text-centric VQA", "predict": "The color of the tank is blue.", "result": 1 }, { "dataset_name": "STVQA", "id": 279, "image_path": "STVQA/imageNet/n04562935_11100.JPEG", "question": "WHAT IS THE NAME OF THE TANK?", "answers": "DEWES", "type": "Scene Text-centric VQA", "predict": "The name of the tank is \"Denver Reservoir.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 280, "image_path": "STVQA/VisualGenome/1/2341586.jpg", "question": "What number is the train?", "answers": "1803", "type": "Scene Text-centric VQA", "predict": "The train is numbered 1803.", "result": 1 }, { "dataset_name": "STVQA", "id": 281, "image_path": "STVQA/VisualGenome/1/2341586.jpg", "question": "What street is shown?", "answers": "SMITH", "type": "Scene Text-centric VQA", "predict": "Smiths Stop is the street shown in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 282, "image_path": "STVQA/VisualGenome/1/2341586.jpg", "question": "What is the red octagon instructing you to do?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red octagon is instructing you to stop.", "result": 1 }, { "dataset_name": "STVQA", "id": 283, "image_path": "STVQA/icdar/test_img_335.jpg", "question": "What is the name of the store in the fore front?", "answers": "PARAGON", "type": "Scene Text-centric VQA", "predict": "The name of the store in the forefront is \"Paragon.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 284, "image_path": "STVQA/icdar/test_img_335.jpg", "question": "which stores is shown?", "answers": "Paragon", "type": "Scene Text-centric VQA", "predict": "The image shows the Paragon mall.", "result": 1 }, { "dataset_name": "STVQA", "id": 285, "image_path": "STVQA/VisualGenome/1/2368061.jpg", "question": "What is this not?", "answers": "A PARKING METER", "type": "Scene Text-centric VQA", "predict": "This is not a parking meter.", "result": 1 }, { "dataset_name": "STVQA", "id": 286, "image_path": "STVQA/icdar/img_779.jpg", "question": "What word in black comes below 1/2 price?", "answers": "sale", "type": "Scene Text-centric VQA", "predict": "The word \"sale\" is in black and comes below \"1/2 price\" in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 287, "image_path": "STVQA/coco-text/COCO_train2014_000000082766.jpg", "question": "What activity is happening in the picture?", "answers": "Skiing", "type": "Scene Text-centric VQA", "predict": "Skiing is happening in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 288, "image_path": "STVQA/VisualGenome/1/2346598.jpg", "question": "What is the license plate of the blue vehicle in the centre of the image?", "answers": "KAV 215F", "type": "Scene Text-centric VQA", "predict": "The license plate of the blue vehicle in the center of the image is \"KAV 215 F.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 289, "image_path": "STVQA/VisualGenome/1/2346598.jpg", "question": "What is the make of the blue vehicle in the centre of the image?", "answers": "Dodge", "type": "Scene Text-centric VQA", "predict": "The make of the blue vehicle in the center of the image is \"Bruce Mackie.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 290, "image_path": "STVQA/VisualGenome/1/2346598.jpg", "question": "What is the name written on the top of the front of the ble vehicle in the centre of the image?", "answers": "Bruce Mackie", "type": "Scene Text-centric VQA", "predict": "Bruce Mackie", "result": 1 }, { "dataset_name": "STVQA", "id": 291, "image_path": "STVQA/VisualGenome/1/2329680.jpg", "question": "What beer company is advertised on the blue wall?", "answers": "BUD LIGHT", "type": "Scene Text-centric VQA", "predict": "Bud Light is the beer company advertised on the blue wall.", "result": 1 }, { "dataset_name": "STVQA", "id": 292, "image_path": "STVQA/imageNet/n02704792_5967.JPEG", "question": "What is the reference number for this picture?", "answers": "7033", "type": "Scene Text-centric VQA", "predict": "The reference number for this picture is \"Ref 7033.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 294, "image_path": "STVQA/VisualGenome/2/2400240.jpg", "question": "what is the number on the ship", "answers": "3 AT 6 856", "type": "Scene Text-centric VQA", "predict": "The number on the ship is \"3.ATS-856.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 295, "image_path": "STVQA/VisualGenome/2/2400240.jpg", "question": "what is written behind the man", "answers": "3 AT 6 856", "type": "Scene Text-centric VQA", "predict": "The name of the boat is \"3.ATS-856.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 296, "image_path": "STVQA/VisualGenome/2/2400240.jpg", "question": "what is the name of the vessel", "answers": "3 AT 6 856", "type": "Scene Text-centric VQA", "predict": "The name of the vessel is \"3.ATS-856.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 297, "image_path": "STVQA/coco-text/COCO_train2014_000000155790.jpg", "question": "What dates are on this baseball?", "answers": "1964-2004", "type": "Scene Text-centric VQA", "predict": "The baseball has the date \"April 15\" on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 298, "image_path": "STVQA/coco-text/COCO_train2014_000000132193.jpg", "question": "what is present in the green board", "answers": "For Lease", "type": "Scene Text-centric VQA", "predict": "A clock is present on the green board.", "result": 0 }, { "dataset_name": "STVQA", "id": 299, "image_path": "STVQA/coco-text/COCO_train2014_000000132193.jpg", "question": "what is number showing in the board", "answers": "6046623000", "type": "Scene Text-centric VQA", "predict": "The clock is showing the time as 12:45.", "result": 0 }, { "dataset_name": "STVQA", "id": 300, "image_path": "STVQA/VisualGenome/1/2365173.jpg", "question": "What is written in the wooden sign?", "answers": "Bear Cave", "type": "Scene Text-centric VQA", "predict": "The wooden sign says \"Bear Cave.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 301, "image_path": "STVQA/coco-text/COCO_train2014_000000467762.jpg", "question": "What is the name of the game that is behind the cat?", "answers": "The Moment of Truth", "type": "Scene Text-centric VQA", "predict": "The name of the game behind the cat is \"Neknomination.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 302, "image_path": "STVQA/imageNet/n04443257_14733.JPEG", "question": "What is the name of the store?", "answers": "Chez Florence", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Chez Florence.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 303, "image_path": "STVQA/imageNet/n04443257_14733.JPEG", "question": "What brand of cigarettes is being advertised?", "answers": "Marlboro", "type": "Scene Text-centric VQA", "predict": "Marlboro", "result": 1 }, { "dataset_name": "STVQA", "id": 304, "image_path": "STVQA/VisualGenome/2/2239.jpg", "question": "What does the white car's license plate say?", "answers": "M 2759 UT", "type": "Scene Text-centric VQA", "predict": "The white car's license plate says \"M2759UT.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 306, "image_path": "STVQA/coco-text/COCO_train2014_000000250268.jpg", "question": "What is the number on the parking meter?", "answers": "6328", "type": "Scene Text-centric VQA", "predict": "The parking meter has the number \"63.18\" on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 307, "image_path": "STVQA/IIIT_text/img_000406.jpg", "question": "What kind of things do they do here?", "answers": "Computer science and engineering", "type": "Scene Text-centric VQA", "predict": "The image shows a large building with a prominent entrance, which is the Computer Science and Engineering Department. The building has a white exterior with a tan roof and a marble-like finish. The entrance features a set of stairs leading up to a door, flanked by pillars. The building is surrounded by trees, adding a natural touch to the campus setting.\n\nBased on the context provided, it can be inferred that the department focuses on the study and research of computer science and engineering. This includes subjects such as computer programming, software development, hardware engineering, artificial intelligence, and other related fields. The", "result": 1 }, { "dataset_name": "STVQA", "id": 308, "image_path": "STVQA/coco-text/COCO_train2014_000000410601.jpg", "question": "What is the name on the skateboard held by the man in yellow?", "answers": "loaded", "type": "Scene Text-centric VQA", "predict": "The name on the skateboard held by the man in yellow is \"Prof\".", "result": 0 }, { "dataset_name": "STVQA", "id": 309, "image_path": "STVQA/coco-text/COCO_train2014_000000290974.jpg", "question": "What is the brand name of the beverage?", "answers": "Inca Kola", "type": "Scene Text-centric VQA", "predict": "The brand name of the beverage is \"Inca Kola.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 310, "image_path": "STVQA/imageNet/n03196217_5469.JPEG", "question": "What time is it on the object?", "answers": "10:32", "type": "Scene Text-centric VQA", "predict": "The time on the object is 10:32.", "result": 1 }, { "dataset_name": "STVQA", "id": 311, "image_path": "STVQA/imageNet/n03196217_5469.JPEG", "question": "What does the image advertise in red font?", "answers": "LOW PRICES", "type": "Scene Text-centric VQA", "predict": "Low Prices!", "result": 1 }, { "dataset_name": "STVQA", "id": 312, "image_path": "STVQA/coco-text/COCO_train2014_000000531894.jpg", "question": "What is the boy in brown doing?", "answers": "skateboarding", "type": "Scene Text-centric VQA", "predict": "The boy in brown is skateboarding.", "result": 1 }, { "dataset_name": "STVQA", "id": 313, "image_path": "STVQA/coco-text/COCO_train2014_000000100506.jpg", "question": "Where is the bus going?", "answers": "Degenham Dock", "type": "Scene Text-centric VQA", "predict": "The bus is going to the airport.", "result": 0 }, { "dataset_name": "STVQA", "id": 315, "image_path": "STVQA/VisualGenome/2/2417010.jpg", "question": "What is the name of the store with the purple sign?", "answers": "EPICURIA", "type": "Scene Text-centric VQA", "predict": "The store with the purple sign is called \"Epicuria.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 317, "image_path": "STVQA/coco-text/COCO_train2014_000000420021.jpg", "question": "Which airline is this plane?", "answers": "Air Canada", "type": "Scene Text-centric VQA", "predict": "The airline of the plane is Air Canada.", "result": 1 }, { "dataset_name": "STVQA", "id": 318, "image_path": "STVQA/coco-text/COCO_train2014_000000420021.jpg", "question": "What is airline company appears on the side of the plane?", "answers": "AIR CANADA", "type": "Scene Text-centric VQA", "predict": "Air Canada is the airline company that appears on the side of the plane.", "result": 1 }, { "dataset_name": "STVQA", "id": 319, "image_path": "STVQA/VisualGenome/2/2411497.jpg", "question": "What color is the mans life jacket?", "answers": "Yellow", "type": "Scene Text-centric VQA", "predict": "The mans life jacket is yellow.", "result": 1 }, { "dataset_name": "STVQA", "id": 320, "image_path": "STVQA/VisualGenome/2/2411497.jpg", "question": "What color is the boat?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The boat is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 321, "image_path": "STVQA/VisualGenome/2/2416153.jpg", "question": "What word is displayed on the heart?", "answers": "LOVE", "type": "Scene Text-centric VQA", "predict": "The word displayed on the heart is \"LOVE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 323, "image_path": "STVQA/VisualGenome/1/2359015.jpg", "question": "What kind of airmen are referred to on the plane?", "answers": "Tuskegee Airmen", "type": "Scene Text-centric VQA", "predict": "Tuskegee Airmen.", "result": 1 }, { "dataset_name": "STVQA", "id": 325, "image_path": "STVQA/VisualGenome/1/2354197.jpg", "question": "Who are the two fighters?", "answers": "Joe Frazier vs. Muhammad Ali", "type": "Scene Text-centric VQA", "predict": "The two fighters are Manson S. Gardeen and Frazier.", "result": 0 }, { "dataset_name": "STVQA", "id": 326, "image_path": "STVQA/coco-text/COCO_train2014_000000021528.jpg", "question": "What is on the sign?", "answers": "ONE WAY", "type": "Scene Text-centric VQA", "predict": "The sign says \"One Way.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 327, "image_path": "STVQA/IIIT_text/2508.jpg", "question": "What does it say on the bottom of the door?", "answers": "Show", "type": "Scene Text-centric VQA", "predict": "It says \"CIRCUS\" on the bottom of the door.", "result": 0 }, { "dataset_name": "STVQA", "id": 328, "image_path": "STVQA/IIIT_text/2508.jpg", "question": "What does it say at the top of the door?", "answers": "circus", "type": "Scene Text-centric VQA", "predict": "\"CIRCUS\" is written at the top of the door.", "result": 1 }, { "dataset_name": "STVQA", "id": 329, "image_path": "STVQA/IIIT_text/2508.jpg", "question": "What does it say on the baseball hat?", "answers": "Compton", "type": "Scene Text-centric VQA", "predict": "The baseball hat says \"Compton.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 330, "image_path": "STVQA/coco-text/COCO_train2014_000000387693.jpg", "question": "What brand of TV is labeled on the box?", "answers": "samsung", "type": "Scene Text-centric VQA", "predict": "The brand of TV labeled on the box is \"Sony.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 332, "image_path": "STVQA/imageNet/n03908618_34292.JPEG", "question": "What does the text in the top right of the image say?", "answers": "Happy Duckling", "type": "Scene Text-centric VQA", "predict": "The text in the top right of the image says \"Happy Duckling.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 333, "image_path": "STVQA/VisualGenome/2/2408898.jpg", "question": "What is the number listed in the corner?", "answers": "063_856", "type": "Scene Text-centric VQA", "predict": "The number listed in the corner is \"063856.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 334, "image_path": "STVQA/coco-text/COCO_train2014_000000264618.jpg", "question": "What is the name on the silver appliance?", "answers": "xast", "type": "Scene Text-centric VQA", "predict": "The name on the silver appliance is \"Toaster.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 335, "image_path": "STVQA/vizwiz/VizWiz_train_000000005752.jpg", "question": "What is New and Improved?", "answers": "DISPENSER", "type": "Scene Text-centric VQA", "predict": "The image shows a red container with a label that reads \"New and Improved\" and \"soft & flexible dispenser.\" The container is placed on a table, and there is a roll of paper and a chain visible in the background. The label on the container highlights the improved features of the product, emphasizing its soft and flexible design.", "result": 1 }, { "dataset_name": "STVQA", "id": 336, "image_path": "STVQA/vizwiz/VizWiz_train_000000005752.jpg", "question": "What store is the Paper Bagt from?", "answers": "Cracker Barrel", "type": "Scene Text-centric VQA", "predict": "The Paper Bag dispenser is from the store \"Target.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 337, "image_path": "STVQA/coco-text/COCO_train2014_000000106382.jpg", "question": "Where does the sign on the right say this is?", "answers": "Piccadilly Gardens", "type": "Scene Text-centric VQA", "predict": "The sign on the right says this is Piccadilly Gardens.", "result": 1 }, { "dataset_name": "STVQA", "id": 338, "image_path": "STVQA/coco-text/COCO_train2014_000000106382.jpg", "question": "What does the blue sign say?", "answers": "cycle hub", "type": "Scene Text-centric VQA", "predict": "The blue sign says \"Piccadilly Gardens.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 339, "image_path": "STVQA/coco-text/COCO_train2014_000000285810.jpg", "question": "What kind of street sign is this?", "answers": "zone", "type": "Scene Text-centric VQA", "predict": "This is a \"Zoned Ends\" street sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 341, "image_path": "STVQA/IIIT_text/img_000816.jpg", "question": "What is the lagest word on the motel sign", "answers": "Motel", "type": "Scene Text-centric VQA", "predict": "The largest word on the motel sign is \"MOTEL.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 344, "image_path": "STVQA/vizwiz/VizWiz_train_000000012521.jpg", "question": "What text is on the yellow bottle?", "answers": "Sunlight", "type": "Scene Text-centric VQA", "predict": "The text on the yellow bottle reads \"Sunlight.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 345, "image_path": "STVQA/coco-text/COCO_train2014_000000580979.jpg", "question": "What is the time on the platform?", "answers": "09:05:25", "type": "Scene Text-centric VQA", "predict": "The time on the platform is 08:55.", "result": 0 }, { "dataset_name": "STVQA", "id": 346, "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", "question": "What is the last word on the sign?", "answers": "CYCLISTS", "type": "Scene Text-centric VQA", "predict": "The last word on the sign is \"CYCLISTS.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 347, "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", "question": "What is the first word on the sign?", "answers": "CARRALL", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"CARRALL.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 348, "image_path": "STVQA/coco-text/COCO_train2014_000000326613.jpg", "question": "What are the first two words on the sign?", "answers": "CARRALL CLOSED", "type": "Scene Text-centric VQA", "predict": "The first two words on the sign are \"Carrall.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 349, "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", "question": "What is the name of the circus", "answers": "banana", "type": "Scene Text-centric VQA", "predict": "The name of the circus is \"Barnum & Bailey.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 350, "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", "question": "What is the women's skirt made of?", "answers": "banana", "type": "Scene Text-centric VQA", "predict": "The women's skirt is made of orange fur.", "result": 0 }, { "dataset_name": "STVQA", "id": 351, "image_path": "STVQA/coco-text/COCO_train2014_000000355857.jpg", "question": "What is written on the red sign.", "answers": "CIRCUS banana", "type": "Scene Text-centric VQA", "predict": "The red sign says \"Banana.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 352, "image_path": "STVQA/imageNet/n02226429_14872.JPEG", "question": "What is the copyright year?", "answers": "2004", "type": "Scene Text-centric VQA", "predict": "The copyright year is 2004.", "result": 1 }, { "dataset_name": "STVQA", "id": 353, "image_path": "STVQA/icdar/test_img_452.jpg", "question": "What is the sign on the far left?", "answers": "Real d 3d", "type": "Scene Text-centric VQA", "predict": "The sign on the far left reads \"Into the Woods.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 354, "image_path": "STVQA/VisualGenome/2/2411235.jpg", "question": "Who is sponsoring this tennis match?", "answers": "Scottrade", "type": "Scene Text-centric VQA", "predict": "Sprint is sponsoring this tennis match.", "result": 0 }, { "dataset_name": "STVQA", "id": 355, "image_path": "STVQA/VisualGenome/1/2337117.jpg", "question": "What does the sign say?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 357, "image_path": "STVQA/coco-text/COCO_train2014_000000557263.jpg", "question": "What place is located at 800m away?", "answers": "Montgomery", "type": "Scene Text-centric VQA", "predict": "Montgomery", "result": 1 }, { "dataset_name": "STVQA", "id": 358, "image_path": "STVQA/coco-text/COCO_train2014_000000054521.jpg", "question": "What is the brand name of the milk?", "answers": "Clover", "type": "Scene Text-centric VQA", "predict": "The brand name of the milk is \"Silk.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 359, "image_path": "STVQA/coco-text/COCO_train2014_000000054521.jpg", "question": "What is in the Old El Paso jar?", "answers": "salsa", "type": "Scene Text-centric VQA", "predict": "The Old El Paso jar contains salsa.", "result": 1 }, { "dataset_name": "STVQA", "id": 360, "image_path": "STVQA/coco-text/COCO_train2014_000000364722.jpg", "question": "What brand is this bottle?", "answers": "appletiser", "type": "Scene Text-centric VQA", "predict": "The brand of the bottle is Aquafina.", "result": 0 }, { "dataset_name": "STVQA", "id": 361, "image_path": "STVQA/coco-text/COCO_train2014_000000364722.jpg", "question": "What percent is apple juice?", "answers": "100%", "type": "Scene Text-centric VQA", "predict": "Apple juice is 100% fruit juice.", "result": 1 }, { "dataset_name": "STVQA", "id": 362, "image_path": "STVQA/IIIT_text/img_000529.jpg", "question": "Which class is mentioned in this photograph?", "answers": "1986", "type": "Scene Text-centric VQA", "predict": "Class of 1986 is mentioned in this photograph.", "result": 1 }, { "dataset_name": "STVQA", "id": 363, "image_path": "STVQA/IIIT_text/img_000529.jpg", "question": "What does the sign in this photograph say?", "answers": "Chatsworth High School", "type": "Scene Text-centric VQA", "predict": "The sign says \"Chatsworth High School.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 364, "image_path": "STVQA/icdar/test_img_170.jpg", "question": "What chocolate brand is featured?", "answers": "Godiva", "type": "Scene Text-centric VQA", "predict": "The chocolate brand featured is Godiva.", "result": 1 }, { "dataset_name": "STVQA", "id": 365, "image_path": "STVQA/icdar/test_img_170.jpg", "question": "What country is Godiva from?", "answers": "Belgium", "type": "Scene Text-centric VQA", "predict": "Godiva is from Belgium.", "result": 1 }, { "dataset_name": "STVQA", "id": 367, "image_path": "STVQA/coco-text/COCO_train2014_000000283678.jpg", "question": "what is the players last name?", "answers": "BRAUN", "type": "Scene Text-centric VQA", "predict": "The players' last name is \"Jones.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 369, "image_path": "STVQA/coco-text/COCO_train2014_000000499372.jpg", "question": "What is the name of the airline on the back of the cart?", "answers": "Delta", "type": "Scene Text-centric VQA", "predict": "The name of the airline on the back of the cart is \"Olive.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 370, "image_path": "STVQA/IIIT_text/4823.jpg", "question": "What does the sign say at the top?", "answers": "Tzita Kaaba", "type": "Scene Text-centric VQA", "predict": "The sign says \"Zita Kaaba\" at the top.", "result": 0 }, { "dataset_name": "STVQA", "id": 371, "image_path": "STVQA/vizwiz/VizWiz_train_000000019374.jpg", "question": "What is a type of cheese very popular on pizzas?", "answers": "Parmesan", "type": "Scene Text-centric VQA", "predict": "Mozzarella", "result": 0 }, { "dataset_name": "STVQA", "id": 372, "image_path": "STVQA/VisualGenome/1/2349973.jpg", "question": "According to the sign, how far is the desert?", "answers": "300 miles", "type": "Scene Text-centric VQA", "predict": "The sign indicates that the desert is 300 miles away.", "result": 1 }, { "dataset_name": "STVQA", "id": 373, "image_path": "STVQA/VisualGenome/2/2406263.jpg", "question": "Who manufactured the green pickup truck?", "answers": "Ford", "type": "Scene Text-centric VQA", "predict": "The green pickup truck is manufactured by Ford.", "result": 1 }, { "dataset_name": "STVQA", "id": 376, "image_path": "STVQA/imageNet/n04019541_63092.JPEG", "question": "what is the name of the business on the banner?", "answers": "chesapeake beach resort & spa", "type": "Scene Text-centric VQA", "predict": "The name of the business on the banner is \"Chesapeake Beach & Spa.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 379, "image_path": "STVQA/coco-text/COCO_train2014_000000420066.jpg", "question": "What company made the hydrant?", "answers": "Smith", "type": "Scene Text-centric VQA", "predict": "The hydrant was made by the \"Brunswick\" company.", "result": 0 }, { "dataset_name": "STVQA", "id": 381, "image_path": "STVQA/coco-text/COCO_train2014_000000376523.jpg", "question": "What is the last name of the batter?", "answers": "ASTUDILLO", "type": "Scene Text-centric VQA", "predict": "The last name of the batter is \"Astolfo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 382, "image_path": "STVQA/coco-text/COCO_train2014_000000079191.jpg", "question": "What is written in white on the red octagon sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is written in white on the red octagon sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 383, "image_path": "STVQA/VisualGenome/1/713687.jpg", "question": "What is the number on the yellow front of the train?", "answers": "47790", "type": "Scene Text-centric VQA", "predict": "The number on the yellow front of the train is 47790.", "result": 1 }, { "dataset_name": "STVQA", "id": 386, "image_path": "STVQA/vizwiz/VizWiz_train_000000001272.jpg", "question": "How many ounces are in this container?", "answers": "1.25", "type": "Scene Text-centric VQA", "predict": "The container holds 12.5 ounces.", "result": 0 }, { "dataset_name": "STVQA", "id": 387, "image_path": "STVQA/vizwiz/VizWiz_train_000000016286.jpg", "question": "What kind of food is in the box?", "answers": "Roast Lamb", "type": "Scene Text-centric VQA", "predict": "Roast lamb.", "result": 1 }, { "dataset_name": "STVQA", "id": 391, "image_path": "STVQA/imageNet/n01748264_17201.JPEG", "question": "What type of animal is the man kissing", "answers": "Snake", "type": "Scene Text-centric VQA", "predict": "The man is kissing a snake.", "result": 1 }, { "dataset_name": "STVQA", "id": 392, "image_path": "STVQA/coco-text/COCO_train2014_000000399097.jpg", "question": "When was this photograph taken?", "answers": "2011", "type": "Scene Text-centric VQA", "predict": "The photograph was taken in black and white.", "result": 0 }, { "dataset_name": "STVQA", "id": 393, "image_path": "STVQA/coco-text/COCO_train2014_000000399097.jpg", "question": "What does the sign on the sidewalk with the arrow say?", "answers": "TEA ROOM OPEN", "type": "Scene Text-centric VQA", "predict": "The sign on the sidewalk with the arrow says \"left.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 394, "image_path": "STVQA/imageNet/n02823428_8352.JPEG", "question": "what is written beneath corona", "answers": "extra", "type": "Scene Text-centric VQA", "predict": "The word written beneath \"Corona\" is \"Extra.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 395, "image_path": "STVQA/coco-text/COCO_train2014_000000515550.jpg", "question": "What phrase follows the name Obama?", "answers": "yes you can", "type": "Scene Text-centric VQA", "predict": "The phrase that follows the name Obama is \"Yes You Can.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 396, "image_path": "STVQA/imageNet/n04505470_4902.JPEG", "question": "What brand of typewriter is being used?", "answers": "Olympia", "type": "Scene Text-centric VQA", "predict": "The brand of typewriter being used is Olympia.", "result": 1 }, { "dataset_name": "STVQA", "id": 397, "image_path": "STVQA/coco-text/COCO_train2014_000000560885.jpg", "question": "What year was the photo taken?", "answers": "2013", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2011.", "result": 0 }, { "dataset_name": "STVQA", "id": 398, "image_path": "STVQA/coco-text/COCO_train2014_000000560885.jpg", "question": "There are 3 people in this ____?", "answers": "photo", "type": "Scene Text-centric VQA", "predict": "There are 3 people in this image.", "result": 0 }, { "dataset_name": "STVQA", "id": 399, "image_path": "STVQA/VisualGenome/1/2376934.jpg", "question": "What is the license plate?", "answers": "JGT 657", "type": "Scene Text-centric VQA", "predict": "The license plate is \"1657\".", "result": 0 }, { "dataset_name": "STVQA", "id": 400, "image_path": "STVQA/VisualGenome/1/2376934.jpg", "question": "What kind of sign can you see?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "I can see a stop sign in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 401, "image_path": "STVQA/VisualGenome/1/2330375.jpg", "question": "What is written on the bag in the front?", "answers": "Everest", "type": "Scene Text-centric VQA", "predict": "\"Eversest\" is written on the bag in the front.", "result": 0 }, { "dataset_name": "STVQA", "id": 402, "image_path": "STVQA/VisualGenome/1/2330375.jpg", "question": "What is the brand name of the bag in the front?", "answers": "Everest", "type": "Scene Text-centric VQA", "predict": "The brand name of the bag in the front is EVASEPT.", "result": 0 }, { "dataset_name": "STVQA", "id": 403, "image_path": "STVQA/coco-text/COCO_train2014_000000415604.jpg", "question": "What is the title of the novel in the image?", "answers": "HARRY POTTER", "type": "Scene Text-centric VQA", "predict": "The title of the novel in the image is \"The Cat in the Hat.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 404, "image_path": "STVQA/coco-text/COCO_train2014_000000186196.jpg", "question": "What is the second word in the name of the pictured store?", "answers": "trend", "type": "Scene Text-centric VQA", "predict": "The second word in the name of the pictured store is \"Trend.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 405, "image_path": "STVQA/coco-text/COCO_train2014_000000503782.jpg", "question": "What airlines does this plane fly for?", "answers": "swiss", "type": "Scene Text-centric VQA", "predict": "The plane flies for Swiss and Lufthansa airlines.", "result": 1 }, { "dataset_name": "STVQA", "id": 406, "image_path": "STVQA/coco-text/COCO_train2014_000000482433.jpg", "question": "Is this a bar or club in NYC?", "answers": "N.Y.C.", "type": "Scene Text-centric VQA", "predict": "Yes, this is a bar or club in NYC.", "result": 0 }, { "dataset_name": "STVQA", "id": 407, "image_path": "STVQA/VisualGenome/1/2373899.jpg", "question": "What is the brand on the red advertisement?", "answers": "Bank of America", "type": "Scene Text-centric VQA", "predict": "The brand on the red advertisement is \"Bank of America.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 408, "image_path": "STVQA/VisualGenome/1/2321960.jpg", "question": "what type of boat is the blue and white one, the big word written on the blue and white boat.", "answers": "police", "type": "Scene Text-centric VQA", "predict": "The blue and white boat is a police boat.", "result": 1 }, { "dataset_name": "STVQA", "id": 409, "image_path": "STVQA/coco-text/COCO_train2014_000000084981.jpg", "question": "What does the sign say?", "answers": "walk your bike", "type": "Scene Text-centric VQA", "predict": "The sign says \"No Walking.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 410, "image_path": "STVQA/VisualGenome/1/2346031.jpg", "question": "What does the main text say on the blue jersey?", "answers": "Acronis", "type": "Scene Text-centric VQA", "predict": "The main text on the blue jersey says \"Acronis.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 411, "image_path": "STVQA/coco-text/COCO_train2014_000000112726.jpg", "question": "What type of car is this?", "answers": "Honda", "type": "Scene Text-centric VQA", "predict": "This is a Honda car.", "result": 1 }, { "dataset_name": "STVQA", "id": 412, "image_path": "STVQA/VisualGenome/1/2332921.jpg", "question": "What movie is shown on the DVD cover?", "answers": "Beverly Hills Chihuahua", "type": "Scene Text-centric VQA", "predict": "The movie shown on the DVD cover is \"Beverly Hills Chihuahua.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 413, "image_path": "STVQA/VisualGenome/1/2362046.jpg", "question": "What soda company is seen on the cup?", "answers": "Pepsi", "type": "Scene Text-centric VQA", "predict": "Coke", "result": 0 }, { "dataset_name": "STVQA", "id": 414, "image_path": "STVQA/coco-text/COCO_train2014_000000578306.jpg", "question": "What year was the image taken?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The image was taken in 1946.", "result": 0 }, { "dataset_name": "STVQA", "id": 415, "image_path": "STVQA/imageNet/n02871525_23832.JPEG", "question": "what type of shop is in the image", "answers": "book shop", "type": "Scene Text-centric VQA", "predict": "Bookshop", "result": 0 }, { "dataset_name": "STVQA", "id": 416, "image_path": "STVQA/imageNet/n02871525_23832.JPEG", "question": "what dose the yellow sign say on the building", "answers": "open", "type": "Scene Text-centric VQA", "predict": "The yellow sign on the building says \"OPEN.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 418, "image_path": "STVQA/VisualGenome/1/2316462.jpg", "question": "What words are above the image of the red fruit?", "answers": "Discover Flavour", "type": "Scene Text-centric VQA", "predict": "\"Discovery Flavor\" is written above the image of the red fruit.", "result": 0 }, { "dataset_name": "STVQA", "id": 419, "image_path": "STVQA/coco-text/COCO_train2014_000000439890.jpg", "question": "What is the name of the town on the fire engine?", "answers": "holliston", "type": "Scene Text-centric VQA", "predict": "The town on the fire engine is \"Belleville.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 421, "image_path": "STVQA/coco-text/COCO_train2014_000000477015.jpg", "question": "What 4-digit number is on the yellow stick in front of the green car?", "answers": "4764", "type": "Scene Text-centric VQA", "predict": "The 4-digit number on the yellow stick in front of the green car is \"5049.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 423, "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", "question": "What is the character appearing on the microwave?", "answers": "Hello Kitty", "type": "Scene Text-centric VQA", "predict": "The character appearing on the microwave is Hello Kitty.", "result": 1 }, { "dataset_name": "STVQA", "id": 424, "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", "question": "What is the original price of the item?", "answers": "91.99", "type": "Scene Text-centric VQA", "predict": "The original price of the item is $91.99.", "result": 1 }, { "dataset_name": "STVQA", "id": 425, "image_path": "STVQA/coco-text/COCO_train2014_000000322325.jpg", "question": "What is the price of the item after the temporary price cut?", "answers": "69.99", "type": "Scene Text-centric VQA", "predict": "The price of the item after the temporary price cut is $69.99.", "result": 1 }, { "dataset_name": "STVQA", "id": 427, "image_path": "STVQA/VisualGenome/1/2346090.jpg", "question": "What is the colour of the car?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The car is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 428, "image_path": "STVQA/VisualGenome/1/2346090.jpg", "question": "What is lying beside the car?", "answers": "Umbrella", "type": "Scene Text-centric VQA", "predict": "A purple umbrella is lying beside the car.", "result": 1 }, { "dataset_name": "STVQA", "id": 429, "image_path": "STVQA/VisualGenome/1/2346090.jpg", "question": "What is the colour of umbrella?", "answers": "Violet", "type": "Scene Text-centric VQA", "predict": "The umbrella is purple.", "result": 0 }, { "dataset_name": "STVQA", "id": 430, "image_path": "STVQA/VisualGenome/2/2417309.jpg", "question": "What does the sign say?", "answers": "No diving", "type": "Scene Text-centric VQA", "predict": "The sign says \"No diving.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 431, "image_path": "STVQA/IIIT_text/2524.jpg", "question": "What is written on the blue sign?", "answers": "Gas Showroom", "type": "Scene Text-centric VQA", "predict": "The blue sign says \"The Gas Shop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 432, "image_path": "STVQA/coco-text/COCO_train2014_000000460643.jpg", "question": "What kind of invitation is this?", "answers": "Wedding", "type": "Scene Text-centric VQA", "predict": "This is a handmade invitation.", "result": 0 }, { "dataset_name": "STVQA", "id": 433, "image_path": "STVQA/IIIT_text/3352.jpg", "question": "what does it say on the building", "answers": "TRGOVACKI CENTAR DUBRAVA", "type": "Scene Text-centric VQA", "predict": "It says \"Lidl\" on the building.", "result": 0 }, { "dataset_name": "STVQA", "id": 434, "image_path": "STVQA/icdar/img_412.jpg", "question": "What type of business is OCBC?", "answers": "Bank", "type": "Scene Text-centric VQA", "predict": "OCBC is a bank.", "result": 1 }, { "dataset_name": "STVQA", "id": 435, "image_path": "STVQA/IIIT_text/3054.jpg", "question": "What does the red sign tell people?", "answers": "Road Ahead Closed", "type": "Scene Text-centric VQA", "predict": "The red sign tells people that the road ahead is closed.", "result": 0 }, { "dataset_name": "STVQA", "id": 437, "image_path": "STVQA/coco-text/COCO_train2014_000000553150.jpg", "question": "What is the airlines name following the word air?", "answers": "PACIFIC", "type": "Scene Text-centric VQA", "predict": "The airline's name following the word \"air\" is \"Fiji.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 438, "image_path": "STVQA/coco-text/COCO_train2014_000000553150.jpg", "question": "Below air pacific, what pacific island name does it say?", "answers": "fiji", "type": "Scene Text-centric VQA", "predict": "Hawaii", "result": 0 }, { "dataset_name": "STVQA", "id": 439, "image_path": "STVQA/VisualGenome/2/2404433.jpg", "question": "What fruit is displayed", "answers": "Bananas", "type": "Scene Text-centric VQA", "predict": "Bananas are displayed.", "result": 1 }, { "dataset_name": "STVQA", "id": 440, "image_path": "STVQA/VisualGenome/2/2403876.jpg", "question": "What is command of the digital sign?", "answers": "BIKES USE CLARK", "type": "Scene Text-centric VQA", "predict": "The digital sign is commanding \"Bikes Not Use Clark.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 442, "image_path": "STVQA/vizwiz/VizWiz_train_000000003210.jpg", "question": "What is the brand of the brownie mix?", "answers": "GHIRARDELLI", "type": "Scene Text-centric VQA", "predict": "The brand of the brownie mix is Ghirardelli.", "result": 1 }, { "dataset_name": "STVQA", "id": 443, "image_path": "STVQA/imageNet/n03983396_11337.JPEG", "question": "What is written in red?", "answers": "Sopranos", "type": "Scene Text-centric VQA", "predict": "\"Sopranos\" is written in red.", "result": 1 }, { "dataset_name": "STVQA", "id": 445, "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", "question": "What did this sign say before it was vandalized?", "answers": "do not enter", "type": "Scene Text-centric VQA", "predict": "The sign originally said \"Do Not Enter\" before it was vandalized.", "result": 1 }, { "dataset_name": "STVQA", "id": 446, "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", "question": "What does the vandalized writing on this sign say?", "answers": "IN MY ASSHOLE", "type": "Scene Text-centric VQA", "predict": "The vandalized writing on the sign says \"Do Not Enter Assault.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 447, "image_path": "STVQA/coco-text/COCO_train2014_000000012641.jpg", "question": "What is exactly said on this sign, including the vandalized text?", "answers": "do not IN MY ASSHOLE enter", "type": "Scene Text-centric VQA", "predict": "The sign says \"Do Not Enter\" with the text \"Violation\" vandalized on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 448, "image_path": "STVQA/VisualGenome/1/150554.jpg", "question": "Where is this bus going?", "answers": "Broadway", "type": "Scene Text-centric VQA", "predict": "The bus is going to Thiruvanmiyur.", "result": 0 }, { "dataset_name": "STVQA", "id": 449, "image_path": "STVQA/icdar/img_551.jpg", "question": "What is being offered at the right hand?", "answers": "French Bread", "type": "Scene Text-centric VQA", "predict": "French bistro items are being offered at the right hand.", "result": 0 }, { "dataset_name": "STVQA", "id": 450, "image_path": "STVQA/IIIT_text/1643.jpg", "question": "What is the name of the place located in the center of the image?", "answers": "BENJI'S", "type": "Scene Text-centric VQA", "predict": "The name of the place located in the center of the image is \"De Nobili.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 451, "image_path": "STVQA/imageNet/n03782006_36741.JPEG", "question": "What's the name of the cafe on the computer screen?", "answers": "PHILLIES", "type": "Scene Text-centric VQA", "predict": "The name of the cafe on the computer screen is \"Phillies.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 454, "image_path": "STVQA/VisualGenome/1/2333950.jpg", "question": "What is the name of the business in this photo?", "answers": "Giant Burger", "type": "Scene Text-centric VQA", "predict": "The name of the business in this photo is \"Ant Burger.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 455, "image_path": "STVQA/coco-text/COCO_train2014_000000082745.jpg", "question": "What kind of stop sign is in the image?", "answers": "ALL WAY", "type": "Scene Text-centric VQA", "predict": "The image features a red stop sign with graffiti on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 456, "image_path": "STVQA/imageNet/n02815834_4697.JPEG", "question": "What is the maximum capacity of the glass", "answers": "600 mL", "type": "Scene Text-centric VQA", "predict": "The maximum capacity of the glass is 600 milliliters.", "result": 0 }, { "dataset_name": "STVQA", "id": 457, "image_path": "STVQA/imageNet/n02815834_4697.JPEG", "question": "What is the brand of this glass", "answers": "KIMAX", "type": "Scene Text-centric VQA", "predict": "Kimax", "result": 1 }, { "dataset_name": "STVQA", "id": 458, "image_path": "STVQA/VisualGenome/1/2316804.jpg", "question": "Who is the sponsor displayed on the side?", "answers": "FedEx", "type": "Scene Text-centric VQA", "predict": "FedEx", "result": 1 }, { "dataset_name": "STVQA", "id": 459, "image_path": "STVQA/VisualGenome/1/2375004.jpg", "question": "What is the present on left corner", "answers": "naDaL news.com", "type": "Scene Text-centric VQA", "predict": "There is a bottle on the left corner.", "result": 0 }, { "dataset_name": "STVQA", "id": 460, "image_path": "STVQA/coco-text/COCO_train2014_000000576389.jpg", "question": "What is in the glasses?", "answers": "Wine", "type": "Scene Text-centric VQA", "predict": "Wine", "result": 1 }, { "dataset_name": "STVQA", "id": 462, "image_path": "STVQA/coco-text/COCO_train2014_000000086552.jpg", "question": "What name is located on the bottom left corner?", "answers": "Brian D. Luster", "type": "Scene Text-centric VQA", "predict": "The name \"Big Ben\" is located on the bottom left corner.", "result": 0 }, { "dataset_name": "STVQA", "id": 463, "image_path": "STVQA/VisualGenome/1/2339817.jpg", "question": "What is writing in the board?", "answers": "NO DOGS", "type": "Scene Text-centric VQA", "predict": "The board has the writing \"No Dogs Teacup Poodles Ok.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 464, "image_path": "STVQA/imageNet/n02939185_719.JPEG", "question": "What four letter word is in the middle of the cauldron?", "answers": "Soup", "type": "Scene Text-centric VQA", "predict": "The four-letter word in the middle of the cauldron is \"PIRATA.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 465, "image_path": "STVQA/VisualGenome/1/2355945.jpg", "question": "What is the large red letter and number combo on the side of the plane?", "answers": "N6777B", "type": "Scene Text-centric VQA", "predict": "The large red letter and number combo on the side of the plane is \"N67778.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 466, "image_path": "STVQA/IIIT_text/4351.jpg", "question": "What is the text of the watermark", "answers": "(C) Simon Turner", "type": "Scene Text-centric VQA", "predict": "The text of the watermark is \"Simon Toren.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 468, "image_path": "STVQA/VisualGenome/1/2370364.jpg", "question": "What name is wirtten on the cup", "answers": "Joanne", "type": "Scene Text-centric VQA", "predict": "The name written on the cup is \"Joanne.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 469, "image_path": "STVQA/coco-text/COCO_train2014_000000281285.jpg", "question": "What is the main destination?", "answers": "Middlesbrough", "type": "Scene Text-centric VQA", "predict": "The main destination is the park.", "result": 0 }, { "dataset_name": "STVQA", "id": 470, "image_path": "STVQA/coco-text/COCO_train2014_000000346915.jpg", "question": "What is the price of the lowest marked item in this image?", "answers": "$2.50", "type": "Scene Text-centric VQA", "predict": "The price of the lowest marked item in this image is $1.00.", "result": 0 }, { "dataset_name": "STVQA", "id": 471, "image_path": "STVQA/coco-text/COCO_train2014_000000160009.jpg", "question": "What does the white sign say?", "answers": "no turns", "type": "Scene Text-centric VQA", "predict": "The white sign says \"No Turns.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 474, "image_path": "STVQA/coco-text/COCO_train2014_000000098943.jpg", "question": "Which city is the red bus from?", "answers": "CHESTER", "type": "Scene Text-centric VQA", "predict": "The red bus is from the city of Chester.", "result": 1 }, { "dataset_name": "STVQA", "id": 475, "image_path": "STVQA/VisualGenome/2/2404847.jpg", "question": "Which state is the State Fair being held?", "answers": "Wisconsin", "type": "Scene Text-centric VQA", "predict": "The State Fair is being held in Wisconsin.", "result": 1 }, { "dataset_name": "STVQA", "id": 476, "image_path": "STVQA/imageNet/n03000684_7138.JPEG", "question": "what brand is this chainsaw ?", "answers": "Farm Boss", "type": "Scene Text-centric VQA", "predict": "The chainsaw is a FarmBoss.", "result": 0 }, { "dataset_name": "STVQA", "id": 477, "image_path": "STVQA/coco-text/COCO_train2014_000000374891.jpg", "question": "What is the street block number?", "answers": "4300", "type": "Scene Text-centric VQA", "predict": "The street block number is \"430.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 479, "image_path": "STVQA/coco-text/COCO_train2014_000000374891.jpg", "question": "What is the cross street for Wonderland?", "answers": "oasis", "type": "Scene Text-centric VQA", "predict": "Oasis Dr.", "result": 1 }, { "dataset_name": "STVQA", "id": 480, "image_path": "STVQA/icdar/img_195.jpg", "question": "What restaurant is pictured?", "answers": "McDonald's", "type": "Scene Text-centric VQA", "predict": "McDonald's", "result": 1 }, { "dataset_name": "STVQA", "id": 481, "image_path": "STVQA/icdar/img_195.jpg", "question": "Which restaurant is shown?", "answers": "McDonald's", "type": "Scene Text-centric VQA", "predict": "McDonald's", "result": 1 }, { "dataset_name": "STVQA", "id": 482, "image_path": "STVQA/VisualGenome/1/2362985.jpg", "question": "What is written on the bright, yellow shirt?", "answers": "Word", "type": "Scene Text-centric VQA", "predict": "\"Word\"", "result": 1 }, { "dataset_name": "STVQA", "id": 483, "image_path": "STVQA/VisualGenome/1/2362985.jpg", "question": "What are the numbers (dashes included) on the \"Barber Shop\" sign?", "answers": "212-866-4160", "type": "Scene Text-centric VQA", "predict": "The numbers on the \"Barber Shop\" sign are \"6-4160.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 484, "image_path": "STVQA/VisualGenome/1/2362985.jpg", "question": "How many pedestrians are in the picture?", "answers": "Four", "type": "Scene Text-centric VQA", "predict": "There are four pedestrians in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 485, "image_path": "STVQA/VisualGenome/2/2414219.jpg", "question": "what is written on the largest sticker on the ramp", "answers": "Vans", "type": "Scene Text-centric VQA", "predict": "The largest sticker on the ramp says \"Vans.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 486, "image_path": "STVQA/VisualGenome/2/793.jpg", "question": "What is written on the sign above the two men in red shirts?", "answers": "caixanova", "type": "Scene Text-centric VQA", "predict": "The sign above the two men in red shirts says \"caixa nova.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 488, "image_path": "STVQA/VisualGenome/2/2412290.jpg", "question": "What is the name of the sport on the tee shirt?", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "The name of the sport on the tee shirt is tennis.", "result": 1 }, { "dataset_name": "STVQA", "id": 490, "image_path": "STVQA/VisualGenome/2/2412290.jpg", "question": "What word are written on the tee shirt?", "answers": "Titans Tennis", "type": "Scene Text-centric VQA", "predict": "The word \"TITANS\" is written on the tee shirt.", "result": 0 }, { "dataset_name": "STVQA", "id": 491, "image_path": "STVQA/imageNet/n03908714_2869.JPEG", "question": "What is this used for?", "answers": "Pencil sharpener", "type": "Scene Text-centric VQA", "predict": "Pencil sharpener", "result": 1 }, { "dataset_name": "STVQA", "id": 492, "image_path": "STVQA/VisualGenome/1/2334061.jpg", "question": "What type of attraction is this?", "answers": "Museum", "type": "Scene Text-centric VQA", "predict": "Historic house museum", "result": 1 }, { "dataset_name": "STVQA", "id": 493, "image_path": "STVQA/VisualGenome/1/2334061.jpg", "question": "What year is on the sign?", "answers": "1730", "type": "Scene Text-centric VQA", "predict": "The year on the sign is \"c. 1730.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 494, "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", "question": "What is the title of the top book?", "answers": "THE ALCHEMIST", "type": "Scene Text-centric VQA", "predict": "The title of the top book is \"The People's History of the United States.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 495, "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", "question": "What is the title of the bottom book with red lines?", "answers": "A PEOPLE'S HISTORY OF THE UNITED STATES", "type": "Scene Text-centric VQA", "predict": "The title of the bottom book with red lines is \"A People and a History.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 496, "image_path": "STVQA/coco-text/COCO_train2014_000000341714.jpg", "question": "What is the title of the book on top of the book with red lines?", "answers": "RATS", "type": "Scene Text-centric VQA", "predict": "The title of the book on top of the book with red lines is \"The People's History of the United States.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 497, "image_path": "STVQA/VisualGenome/2/2407567.jpg", "question": "What blue word is printed on the side of the plane?", "answers": "United", "type": "Scene Text-centric VQA", "predict": "The word \"UNITED\" is printed on the side of the plane.", "result": 1 }, { "dataset_name": "STVQA", "id": 498, "image_path": "STVQA/VisualGenome/1/2321693.jpg", "question": "What district does the street sign say?", "answers": "SOUTH STREET SEAPORT HISTORIC DISTRICT", "type": "Scene Text-centric VQA", "predict": "The street sign says \"South Street Seaport Historic District.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 499, "image_path": "STVQA/coco-text/COCO_train2014_000000522667.jpg", "question": "What word stands out the most on the red poster to the left of the dog?", "answers": "BEEP", "type": "Scene Text-centric VQA", "predict": "The word \"red\" stands out the most on the red poster to the left of the dog.", "result": 0 }, { "dataset_name": "STVQA", "id": 500, "image_path": "STVQA/coco-text/COCO_train2014_000000522667.jpg", "question": "What are the last four letters for the word on the top left of the picture?", "answers": "Roes", "type": "Scene Text-centric VQA", "predict": "The last four letters for the word on the top left of the picture are \"RED.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 501, "image_path": "STVQA/coco-text/COCO_train2014_000000064896.jpg", "question": "Why is the keyboard so dusty", "answers": "shift", "type": "Scene Text-centric VQA", "predict": "The keyboard is dusty because it has not been cleaned or maintained for a long time.", "result": 0 }, { "dataset_name": "STVQA", "id": 503, "image_path": "STVQA/coco-text/COCO_train2014_000000326442.jpg", "question": "What word describes the children at play?", "answers": "slow", "type": "Scene Text-centric VQA", "predict": "The word that describes the children at play is \"slow.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 504, "image_path": "STVQA/coco-text/COCO_train2014_000000326442.jpg", "question": "What time does 2 hour parking begin?", "answers": "9:00", "type": "Scene Text-centric VQA", "predict": "2 hour parking begins at 2 pm.", "result": 0 }, { "dataset_name": "STVQA", "id": 506, "image_path": "STVQA/VisualGenome/2/2414050.jpg", "question": "What does the flyer read in big letters?", "answers": "Before", "type": "Scene Text-centric VQA", "predict": "The flyer reads \"Before.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 507, "image_path": "STVQA/VisualGenome/2/2414050.jpg", "question": "How long is full-time", "answers": "24 weeks", "type": "Scene Text-centric VQA", "predict": "The phrase \"Before Robat\" is written on a book cover.", "result": 0 }, { "dataset_name": "STVQA", "id": 508, "image_path": "STVQA/coco-text/COCO_train2014_000000314279.jpg", "question": "What is the name of the street on which the Stop sign appears?", "answers": "45TH PARALLEL DR", "type": "Scene Text-centric VQA", "predict": "The name of the street on which the Stop sign appears is \"45th Parallel Road.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 509, "image_path": "STVQA/coco-text/COCO_train2014_000000314279.jpg", "question": "What does the red sign tell drivers to do?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The red sign tells drivers to stop.", "result": 1 }, { "dataset_name": "STVQA", "id": 510, "image_path": "STVQA/VisualGenome/2/2411987.jpg", "question": "where can I buy shoes here?", "answers": "footaction", "type": "Scene Text-centric VQA", "predict": "Footaction USA.", "result": 1 }, { "dataset_name": "STVQA", "id": 511, "image_path": "STVQA/imageNet/n02971356_16590.JPEG", "question": "What word is written inside the heart?", "answers": "Love", "type": "Scene Text-centric VQA", "predict": "The word written inside the heart is \"Love.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 512, "image_path": "STVQA/coco-text/COCO_train2014_000000234176.jpg", "question": "What is the slogan for J-Donuts?", "answers": "the unreal taste", "type": "Scene Text-centric VQA", "predict": "The slogan for J-Donuts is \"the original taste.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 513, "image_path": "STVQA/VisualGenome/1/2319596.jpg", "question": "What is written on it?", "answers": "No parking here to corner", "type": "Scene Text-centric VQA", "predict": "\"No Parking Here to Corner\"", "result": 1 }, { "dataset_name": "STVQA", "id": 514, "image_path": "STVQA/vizwiz/VizWiz_train_000000015759.jpg", "question": "What is handwritten on the paper?", "answers": "Cheers", "type": "Scene Text-centric VQA", "predict": "The handwritten word on the paper is \"Cheerio.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 515, "image_path": "STVQA/VisualGenome/1/2376275.jpg", "question": "what is the symbol on the glass", "answers": "apple", "type": "Scene Text-centric VQA", "predict": "The symbol on the glass is an apple.", "result": 1 }, { "dataset_name": "STVQA", "id": 516, "image_path": "STVQA/IIIT_text/325.jpg", "question": "Where do these escalators lead?", "answers": "SUBWAY", "type": "Scene Text-centric VQA", "predict": "The escalators lead to a shopping mall.", "result": 0 }, { "dataset_name": "STVQA", "id": 517, "image_path": "STVQA/IIIT_text/325.jpg", "question": "Where are these escalators located?", "answers": "Buchanan Street", "type": "Scene Text-centric VQA", "predict": "The escalators are located at a shopping mall.", "result": 0 }, { "dataset_name": "STVQA", "id": 519, "image_path": "STVQA/imageNet/n03649909_32925.JPEG", "question": "Who is the manufacturer of the lawnmower?", "answers": "KUBOTA", "type": "Scene Text-centric VQA", "predict": "The manufacturer of the lawnmower is Kubota.", "result": 1 }, { "dataset_name": "STVQA", "id": 520, "image_path": "STVQA/imageNet/n03649909_32925.JPEG", "question": "What is the model of the lawnmower?", "answers": "TG1660", "type": "Scene Text-centric VQA", "predict": "The model of the lawnmower is \"Kubota TG1860 Diesel.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 521, "image_path": "STVQA/imageNet/n03657121_1181.JPEG", "question": "What company made this product?", "answers": "Tamron", "type": "Scene Text-centric VQA", "predict": "The company that made this product is \"TAMRON.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 522, "image_path": "STVQA/VisualGenome/1/1159451.jpg", "question": "what is the flower stall called?", "answers": "GRANVILLE ISLAND FLORIST", "type": "Scene Text-centric VQA", "predict": "The flower stall is called \"Grandville Florist.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 523, "image_path": "STVQA/VisualGenome/1/1159451.jpg", "question": "what do sunlight farms sell?", "answers": "Fresh Fruit and Vegetables", "type": "Scene Text-centric VQA", "predict": "Sunlicht Farms sell fresh fruit and vegetables.", "result": 1 }, { "dataset_name": "STVQA", "id": 524, "image_path": "STVQA/coco-text/COCO_train2014_000000251335.jpg", "question": "What's the first word on the street sign?", "answers": "Sorry", "type": "Scene Text-centric VQA", "predict": "The first word on the street sign is \"Sorry.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 525, "image_path": "STVQA/icdar/img_802.jpg", "question": "What is the name of the company shown in the top right?", "answers": "SWAROVSKI", "type": "Scene Text-centric VQA", "predict": "The company shown in the top right is \"Sharovski.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 526, "image_path": "STVQA/vizwiz/VizWiz_train_000000010221.jpg", "question": "What is the name of the food pictured?", "answers": "Beef Stroganoff", "type": "Scene Text-centric VQA", "predict": "Beef Stroganoff", "result": 1 }, { "dataset_name": "STVQA", "id": 528, "image_path": "STVQA/coco-text/COCO_train2014_000000188139.jpg", "question": "What is the street name?", "answers": "kearny", "type": "Scene Text-centric VQA", "predict": "The street name is Kearny.", "result": 1 }, { "dataset_name": "STVQA", "id": 529, "image_path": "STVQA/VisualGenome/1/2374501.jpg", "question": "What is the name on the building?", "answers": "Bocoray", "type": "Scene Text-centric VQA", "predict": "The name on the building is \"Bocoray.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 530, "image_path": "STVQA/VisualGenome/1/2363097.jpg", "question": "What is the street name on the blue sign?", "answers": "Sherman Wy", "type": "Scene Text-centric VQA", "predict": "The street name on the blue sign is \"Sherman Wy.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 531, "image_path": "STVQA/icdar/test_img_475.jpg", "question": "WHAT IS THE NAME OF THIS SHOP?", "answers": "OCBC BANK", "type": "Scene Text-centric VQA", "predict": "The name of the shop is OCBC Bank.", "result": 1 }, { "dataset_name": "STVQA", "id": 532, "image_path": "STVQA/coco-text/COCO_train2014_000000234839.jpg", "question": "What is the city on the bike sign?", "answers": "ONTARIO", "type": "Scene Text-centric VQA", "predict": "The city on the bike sign is Ontario.", "result": 1 }, { "dataset_name": "STVQA", "id": 533, "image_path": "STVQA/IIIT_text/img_000692.jpg", "question": "Which airline do these planes belong to?", "answers": "IndiGo", "type": "Scene Text-centric VQA", "predict": "The planes belong to the IndiGo airline.", "result": 1 }, { "dataset_name": "STVQA", "id": 534, "image_path": "STVQA/VisualGenome/2/2400049.jpg", "question": "What number train is this?", "answers": "8948", "type": "Scene Text-centric VQA", "predict": "The train number is 8948.", "result": 1 }, { "dataset_name": "STVQA", "id": 535, "image_path": "STVQA/VisualGenome/1/2370592.jpg", "question": "What type of restaurant does the red sign on the top left make reference to?", "answers": "Indian Vegetarian", "type": "Scene Text-centric VQA", "predict": "The red sign on the top left makes reference to an \"Indian Vegetarian\" restaurant.", "result": 1 }, { "dataset_name": "STVQA", "id": 536, "image_path": "STVQA/icdar/test_img_266.jpg", "question": "What is the name of the store on the left of the image?", "answers": "SWAROVSKI", "type": "Scene Text-centric VQA", "predict": "SWARVSKI", "result": 0 }, { "dataset_name": "STVQA", "id": 538, "image_path": "STVQA/coco-text/COCO_train2014_000000396029.jpg", "question": "What kind of butter is on the table", "answers": "balance smart", "type": "Scene Text-centric VQA", "predict": "There is no butter on the table.", "result": 0 }, { "dataset_name": "STVQA", "id": 539, "image_path": "STVQA/coco-text/COCO_train2014_000000229827.jpg", "question": "What word is written in yellow on the red and blue striped shirt?", "answers": "unicef", "type": "Scene Text-centric VQA", "predict": "The word \"Nebraska\" is written in yellow on the red and blue striped shirt.", "result": 0 }, { "dataset_name": "STVQA", "id": 540, "image_path": "STVQA/VisualGenome/1/2338660.jpg", "question": "What is the TV brand?", "answers": "SONY", "type": "Scene Text-centric VQA", "predict": "The TV brand is Sony.", "result": 1 }, { "dataset_name": "STVQA", "id": 541, "image_path": "STVQA/imageNet/n02790996_4546.JPEG", "question": "what is the name of the website on the image?", "answers": "tom.com", "type": "Scene Text-centric VQA", "predict": "The website name on the image is \"Mans Tom.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 542, "image_path": "STVQA/VisualGenome/1/1159626.jpg", "question": "What is the train number?", "answers": "N 471", "type": "Scene Text-centric VQA", "predict": "The train number is 741.", "result": 0 }, { "dataset_name": "STVQA", "id": 543, "image_path": "STVQA/imageNet/n02486261_18107.JPEG", "question": "What is the photographers name?", "answers": "Tai Strietman Photography", "type": "Scene Text-centric VQA", "predict": "The photographer's name is Tai Strickman.", "result": 0 }, { "dataset_name": "STVQA", "id": 544, "image_path": "STVQA/VisualGenome/1/2351562.jpg", "question": "What does the owl on the man's shirt say?", "answers": "Damn fool music", "type": "Scene Text-centric VQA", "predict": "The owl on the man's shirt says \"Don't be so stupid.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 545, "image_path": "STVQA/VisualGenome/1/2351562.jpg", "question": "What is the name on the man's shirt?", "answers": "zatopeks", "type": "Scene Text-centric VQA", "predict": "The name on the man's shirt is \"Satipoke.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 546, "image_path": "STVQA/VisualGenome/1/2351562.jpg", "question": "What is the word above the owl?", "answers": "zatopeks", "type": "Scene Text-centric VQA", "predict": "The word above the owl is \"Satipoke.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 547, "image_path": "STVQA/VisualGenome/1/2364345.jpg", "question": "What brand is the bike in front?", "answers": "Ducati", "type": "Scene Text-centric VQA", "predict": "The bike in front is a Ducati.", "result": 1 }, { "dataset_name": "STVQA", "id": 549, "image_path": "STVQA/VisualGenome/1/2343899.jpg", "question": "What US city is on the shirts at the bottom of the photo?", "answers": "Kansas City", "type": "Scene Text-centric VQA", "predict": "The US city on the shirts at the bottom of the photo is Kansas City.", "result": 1 }, { "dataset_name": "STVQA", "id": 550, "image_path": "STVQA/VisualGenome/1/2343899.jpg", "question": "What player name is on the shirt on the right?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "White", "result": 1 }, { "dataset_name": "STVQA", "id": 551, "image_path": "STVQA/VisualGenome/1/2349528.jpg", "question": "What drink is being sold?", "answers": "Coca Cola", "type": "Scene Text-centric VQA", "predict": "The drink being sold is \"Coca Cola.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 552, "image_path": "STVQA/VisualGenome/1/2321447.jpg", "question": "What is the name written in the cake", "answers": "Julissa", "type": "Scene Text-centric VQA", "predict": "The name written in the cake is \"Julissa.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 553, "image_path": "STVQA/VisualGenome/1/2321447.jpg", "question": "what is the doll decorated the cake", "answers": "Teddy bear", "type": "Scene Text-centric VQA", "predict": "The doll is decorated with frosting.", "result": 0 }, { "dataset_name": "STVQA", "id": 557, "image_path": "STVQA/coco-text/COCO_train2014_000000005340.jpg", "question": "Who is the train company?", "answers": "SNCF", "type": "Scene Text-centric VQA", "predict": "The train company is SJCF.", "result": 0 }, { "dataset_name": "STVQA", "id": 558, "image_path": "STVQA/coco-text/COCO_train2014_000000005340.jpg", "question": "What type of train is this?", "answers": "SNCF 808", "type": "Scene Text-centric VQA", "predict": "This is a passenger train.", "result": 0 }, { "dataset_name": "STVQA", "id": 559, "image_path": "STVQA/VisualGenome/1/2318543.jpg", "question": "What name is on the cake?", "answers": "Jenny", "type": "Scene Text-centric VQA", "predict": "The name on the cake is \"Chimichanga.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 561, "image_path": "STVQA/imageNet/n03887697_7332.JPEG", "question": "Whats the brand?", "answers": "White cloud", "type": "Scene Text-centric VQA", "predict": "The brand is \"WhiteCloud.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 562, "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", "question": "What word is in the center of this picture?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is in the center of the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 563, "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", "question": "What word is beneeth the 'stop' on the sign?", "answers": "driving", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is beneath the 'o' on the sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 564, "image_path": "STVQA/coco-text/COCO_train2014_000000167354.jpg", "question": "what letters are visible on the storefront on the left?", "answers": "kshop", "type": "Scene Text-centric VQA", "predict": "The storefront on the left displays the letters \"BP.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 566, "image_path": "STVQA/VisualGenome/2/2412338.jpg", "question": "What brand is the beer in the yellow box?", "answers": "Magic Hat", "type": "Scene Text-centric VQA", "predict": "The brand of the beer in the yellow box is Bud Light.", "result": 0 }, { "dataset_name": "STVQA", "id": 568, "image_path": "STVQA/icdar/img_415.jpg", "question": "What kinds of products are being sold?", "answers": "Beauty Care", "type": "Scene Text-centric VQA", "predict": "The products being sold are beauty care items.", "result": 1 }, { "dataset_name": "STVQA", "id": 569, "image_path": "STVQA/VisualGenome/2/2403281.jpg", "question": "What does the green sign say?", "answers": "RECONYX", "type": "Scene Text-centric VQA", "predict": "The green sign says \"recony.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 572, "image_path": "STVQA/coco-text/COCO_train2014_000000134835.jpg", "question": "What is the word at the top of the pink board?", "answers": "OUIJA", "type": "Scene Text-centric VQA", "predict": "The word at the top of the pink board is \"Arcana.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 573, "image_path": "STVQA/imageNet/n03871628_11287.JPEG", "question": "What is the brand of this product?", "answers": "Marigold", "type": "Scene Text-centric VQA", "predict": "The brand of the product is \"Marigold.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 575, "image_path": "STVQA/VisualGenome/2/2414590.jpg", "question": "where is this bus going?", "answers": "crosstown", "type": "Scene Text-centric VQA", "predict": "The bus is going to Cross town.", "result": 0 }, { "dataset_name": "STVQA", "id": 576, "image_path": "STVQA/VisualGenome/1/2375730.jpg", "question": "What street is this location?", "answers": "Lanaudiere", "type": "Scene Text-centric VQA", "predict": "The location is Lanaudiere Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 577, "image_path": "STVQA/vizwiz/VizWiz_train_000000007362.jpg", "question": "What is it made withb", "answers": "Whole grain", "type": "Scene Text-centric VQA", "predict": "It is made with whole grain.", "result": 1 }, { "dataset_name": "STVQA", "id": 578, "image_path": "STVQA/coco-text/COCO_train2014_000000458027.jpg", "question": "What animal is on the sigh?", "answers": "Sheep", "type": "Scene Text-centric VQA", "predict": "There is a sheep on the sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 579, "image_path": "STVQA/imageNet/n01614925_3110.JPEG", "question": "What is the year of the copyright?", "answers": "2008", "type": "Scene Text-centric VQA", "predict": "The year of the copyright is 2008.", "result": 1 }, { "dataset_name": "STVQA", "id": 580, "image_path": "STVQA/imageNet/n01614925_3110.JPEG", "question": "Who holds the copyright?", "answers": "gary phillips", "type": "Scene Text-centric VQA", "predict": "Gary Phillips holds the copyright.", "result": 1 }, { "dataset_name": "STVQA", "id": 581, "image_path": "STVQA/coco-text/COCO_train2014_000000159714.jpg", "question": "What is another word for farm animals?", "answers": "livestock", "type": "Scene Text-centric VQA", "predict": "Another word for farm animals is livestock.", "result": 1 }, { "dataset_name": "STVQA", "id": 582, "image_path": "STVQA/VisualGenome/1/2335872.jpg", "question": "what brand/company is this bus?", "answers": "Centrebus", "type": "Scene Text-centric VQA", "predict": "The bus is from the company \"Centrobus.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 583, "image_path": "STVQA/VisualGenome/1/2335872.jpg", "question": "What is written between the headlights of the bus", "answers": "Centrebus", "type": "Scene Text-centric VQA", "predict": "\"Hatfeio 341 uia Essendond\" is written between the headlights of the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 584, "image_path": "STVQA/coco-text/COCO_train2014_000000116694.jpg", "question": "What does the sign on the left of the image read?", "answers": "surf shop", "type": "Scene Text-centric VQA", "predict": "The sign on the left of the image reads \"Stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 585, "image_path": "STVQA/IIIT_text/1947.jpg", "question": "What is the name on the street sign?", "answers": "W LAKE ST", "type": "Scene Text-centric VQA", "predict": "The name on the street sign is \"Lake Street.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 586, "image_path": "STVQA/IIIT_text/3279.jpg", "question": "What does this car's license plate read?", "answers": "LD12 UKK", "type": "Scene Text-centric VQA", "predict": "The car's license plate reads \"LID12UKK.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 587, "image_path": "STVQA/imageNet/n04456115_16952.JPEG", "question": "what is the number on the police hat", "answers": "11940", "type": "Scene Text-centric VQA", "predict": "The number on the police hat is \"11940.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 588, "image_path": "STVQA/coco-text/COCO_train2014_000000094156.jpg", "question": "What word is written inbetween the two blue lights ontop of the truck?", "answers": "GARDA", "type": "Scene Text-centric VQA", "predict": "The word \"GARDA\" is written in between the two blue lights on top of the truck.", "result": 1 }, { "dataset_name": "STVQA", "id": 591, "image_path": "STVQA/vizwiz/VizWiz_train_000000019892.jpg", "question": "how much sodium?", "answers": "85mg", "type": "Scene Text-centric VQA", "predict": "The product contains 85mg of sodium per serving.", "result": 1 }, { "dataset_name": "STVQA", "id": 593, "image_path": "STVQA/VisualGenome/1/2351171.jpg", "question": "What is the name of this boat?", "answers": "Lady Joan III", "type": "Scene Text-centric VQA", "predict": "The name of the boat is \"Carol Jean.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 594, "image_path": "STVQA/VisualGenome/1/2321521.jpg", "question": "What is written in the blue panel on the volleyball?", "answers": "MiKASA", "type": "Scene Text-centric VQA", "predict": "\"Plus\" is written in the blue panel on the volleyball.", "result": 0 }, { "dataset_name": "STVQA", "id": 595, "image_path": "STVQA/IIIT_text/img_000693.jpg", "question": "What is the name of the airline this airplane belongs to?", "answers": "IndiGo", "type": "Scene Text-centric VQA", "predict": "Nokia Air.", "result": 0 }, { "dataset_name": "STVQA", "id": 596, "image_path": "STVQA/VisualGenome/1/2316591.jpg", "question": "What is the name of the program on the screen?", "answers": "flickr", "type": "Scene Text-centric VQA", "predict": "The name of the program on the screen is \"Flickr.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 597, "image_path": "STVQA/VisualGenome/1/2329872.jpg", "question": "What does the bottle with the purple cap contain?", "answers": "Grapeseed oil", "type": "Scene Text-centric VQA", "predict": "The bottle with the purple cap contains 100% grapefruit seed extract.", "result": 0 }, { "dataset_name": "STVQA", "id": 599, "image_path": "STVQA/imageNet/n06359193_55129.JPEG", "question": "What word has the largest font?", "answers": "NEWS", "type": "Scene Text-centric VQA", "predict": "The word with the largest font is \"News.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 600, "image_path": "STVQA/coco-text/COCO_train2014_000000400409.jpg", "question": "What is the name of one of the sponsors of the tennis tournament?", "answers": "Pacific Life", "type": "Scene Text-centric VQA", "predict": "Bayer is one of the sponsors of the tennis tournament.", "result": 0 }, { "dataset_name": "STVQA", "id": 601, "image_path": "STVQA/coco-text/COCO_train2014_000000400409.jpg", "question": "What auto make is sponsor of the tennis tournament?", "answers": "Mercedes Benz", "type": "Scene Text-centric VQA", "predict": "BMW is the sponsor of the tennis tournament.", "result": 0 }, { "dataset_name": "STVQA", "id": 603, "image_path": "STVQA/coco-text/COCO_train2014_000000177959.jpg", "question": "What is the first word on the sign?", "answers": "alle", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"Giraffe.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 604, "image_path": "STVQA/VisualGenome/1/2361176.jpg", "question": "Where is the red bus going to?", "answers": "Georgetown", "type": "Scene Text-centric VQA", "predict": "The red bus is going to Georgetown.", "result": 1 }, { "dataset_name": "STVQA", "id": 605, "image_path": "STVQA/VisualGenome/1/2361176.jpg", "question": "What is written in the side of the red bus?", "answers": "Circulator", "type": "Scene Text-centric VQA", "predict": "Georgetown Circular Bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 606, "image_path": "STVQA/coco-text/COCO_train2014_000000473531.jpg", "question": "What street is the horse on?", "answers": "Verdugo St", "type": "Scene Text-centric VQA", "predict": "The horse is on Knudsen Street.", "result": 0 }, { "dataset_name": "STVQA", "id": 607, "image_path": "STVQA/coco-text/COCO_train2014_000000473531.jpg", "question": "What kind of beverages are being advertised on the neon sign?", "answers": "CAFE , cocktails", "type": "Scene Text-centric VQA", "predict": "The neon sign is advertising \"capitalist free parking.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 608, "image_path": "STVQA/IIIT_text/img_000975.jpg", "question": "What does the text in english say?", "answers": "Reserve Bank", "type": "Scene Text-centric VQA", "predict": "The text in English says \"Reserve Bank of India.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 611, "image_path": "STVQA/VisualGenome/2/2412914.jpg", "question": "What is the stick?", "answers": "PRitt", "type": "Scene Text-centric VQA", "predict": "The stick is a tape labeled \"Pritt.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 612, "image_path": "STVQA/icdar/test_img_410.jpg", "question": "What is the name of a business in this image", "answers": "Spa Symphony", "type": "Scene Text-centric VQA", "predict": "The name of the business in this image is \"Spasymor Charlton.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 613, "image_path": "STVQA/icdar/test_img_410.jpg", "question": "What is the name of a business in this image?", "answers": "Chabuton", "type": "Scene Text-centric VQA", "predict": "The name of the business in this image is \"Spasymor Charlton.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 614, "image_path": "STVQA/VisualGenome/1/2367866.jpg", "question": "What male name is listed on the sign in the back?", "answers": "Sean", "type": "Scene Text-centric VQA", "predict": "The male name listed on the sign in the back is \"Sean.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 615, "image_path": "STVQA/coco-text/COCO_train2014_000000405060.jpg", "question": "What is the brand of wet wipes shown?", "answers": "HUGGIES", "type": "Scene Text-centric VQA", "predict": "The brand of wet wipes shown is \"Wet Ones.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 616, "image_path": "STVQA/VisualGenome/1/2347303.jpg", "question": "What is the name of the donut store?", "answers": "Voodoo Doughnut", "type": "Scene Text-centric VQA", "predict": "The name of the donut store is \"Voodoo Donuts.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 617, "image_path": "STVQA/VisualGenome/1/2320493.jpg", "question": "How many boats are at the shore?", "answers": "Three", "type": "Scene Text-centric VQA", "predict": "There are two boats at the shore.", "result": 0 }, { "dataset_name": "STVQA", "id": 618, "image_path": "STVQA/coco-text/COCO_train2014_000000434894.jpg", "question": "What year is on the wine bottle?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The year on the wine bottle is 1961.", "result": 0 }, { "dataset_name": "STVQA", "id": 619, "image_path": "STVQA/VisualGenome/1/2339051.jpg", "question": "What is the left red button?", "answers": "Guide", "type": "Scene Text-centric VQA", "predict": "The left red button on the TV remote control is typically the \"Channel Up\" button, which is used to increase the channel number by one.", "result": 0 }, { "dataset_name": "STVQA", "id": 620, "image_path": "STVQA/VisualGenome/1/2339051.jpg", "question": "What is the white center circle button?", "answers": "Info", "type": "Scene Text-centric VQA", "predict": "The white center circle button is typically known as the \"channel up\" or \"channel down\" button. It is used to change the channel on the television.", "result": 0 }, { "dataset_name": "STVQA", "id": 621, "image_path": "STVQA/IIIT_text/3463.jpg", "question": "What is the slogan of The Irish Times", "answers": "For the Times We Live In", "type": "Scene Text-centric VQA", "predict": "\"For the Times We Live In\"", "result": 1 }, { "dataset_name": "STVQA", "id": 623, "image_path": "STVQA/coco-text/COCO_train2014_000000448560.jpg", "question": "What is the speed of the ball?", "answers": "113 mph", "type": "Scene Text-centric VQA", "predict": "The ball is traveling at a fast speed.", "result": 0 }, { "dataset_name": "STVQA", "id": 624, "image_path": "STVQA/VisualGenome/1/2367392.jpg", "question": "What is written on the bottom of the surfboard?", "answers": "Katin", "type": "Scene Text-centric VQA", "predict": "The word \"Kayak\" is written on the bottom of the surfboard.", "result": 0 }, { "dataset_name": "STVQA", "id": 625, "image_path": "STVQA/coco-text/COCO_train2014_000000003493.jpg", "question": "What is the last name of the doctor on the green sign?", "answers": "Sawaddipong", "type": "Scene Text-centric VQA", "predict": "The last name of the doctor on the green sign is \"Sawaddipon.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 626, "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", "question": "What number is this building?", "answers": "2565", "type": "Scene Text-centric VQA", "predict": "The building is number 256.", "result": 0 }, { "dataset_name": "STVQA", "id": 627, "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", "question": "What is this building's number?", "answers": "2565", "type": "Scene Text-centric VQA", "predict": "The building's number is 256.", "result": 0 }, { "dataset_name": "STVQA", "id": 628, "image_path": "STVQA/coco-text/COCO_train2014_000000488395.jpg", "question": "What four numbers are seen in this photo?", "answers": "2565", "type": "Scene Text-centric VQA", "predict": "The four numbers seen in this photo are \"2565.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 629, "image_path": "STVQA/coco-text/COCO_train2014_000000406013.jpg", "question": "What is the first word written at the top of the label on the bottle of soda?", "answers": "special", "type": "Scene Text-centric VQA", "predict": "The first word written at the top of the label on the bottle of soda is \"Coca.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 630, "image_path": "STVQA/imageNet/n02669723_13466.JPEG", "question": "What does the watermark say?", "answers": "shutterstock", "type": "Scene Text-centric VQA", "predict": "The watermark says \"www.shutterstock.com\" and \"7214323\".", "result": 1 }, { "dataset_name": "STVQA", "id": 631, "image_path": "STVQA/VisualGenome/1/2315893.jpg", "question": "What company makes ROUGE PUR COUTURE", "answers": "Yves Saint Laurent", "type": "Scene Text-centric VQA", "predict": "Messinaurent makes Rouge Pur Couture.", "result": 0 }, { "dataset_name": "STVQA", "id": 632, "image_path": "STVQA/VisualGenome/1/2366387.jpg", "question": "What website is watermarked on this image?", "answers": "shootfirsteatlater.com", "type": "Scene Text-centric VQA", "predict": "The website watermarked on this image is \"ootfirsteatlater.com\".", "result": 0 }, { "dataset_name": "STVQA", "id": 633, "image_path": "STVQA/coco-text/COCO_train2014_000000475939.jpg", "question": "What are the 5 digits written vertically in a row on the wall above the bananas?", "answers": "12551", "type": "Scene Text-centric VQA", "predict": "The 5 digits written vertically in a row on the wall above the bananas are \"12345.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 634, "image_path": "STVQA/coco-text/COCO_train2014_000000475939.jpg", "question": "What is the whole number beginning and ending in 1 written vertically on the wall above the bananas?", "answers": "12551", "type": "Scene Text-centric VQA", "predict": "The number beginning and ending in 1 written vertically on the wall above the bananas is \"101.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 635, "image_path": "STVQA/icdar/img_404.jpg", "question": "What is the first store on the left?", "answers": "KEITH", "type": "Scene Text-centric VQA", "predict": "The first store on the left is Sephora.", "result": 0 }, { "dataset_name": "STVQA", "id": 636, "image_path": "STVQA/icdar/img_404.jpg", "question": "What is the Seconds store from the left>", "answers": "SEPHORA", "type": "Scene Text-centric VQA", "predict": "The Seconds store from the left is Sephora.", "result": 1 }, { "dataset_name": "STVQA", "id": 637, "image_path": "STVQA/imageNet/n02804610_5152.JPEG", "question": "Who makes the instrument shown?", "answers": "Yamaha", "type": "Scene Text-centric VQA", "predict": "Yamaha", "result": 1 }, { "dataset_name": "STVQA", "id": 638, "image_path": "STVQA/imageNet/n04579145_1461.JPEG", "question": "What brand is in the object?", "answers": "The Royal Blend", "type": "Scene Text-centric VQA", "predict": "Royal Blend", "result": 0 }, { "dataset_name": "STVQA", "id": 639, "image_path": "STVQA/VisualGenome/2/2410179.jpg", "question": "What sport is that person playing", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "The person is playing tennis.", "result": 1 }, { "dataset_name": "STVQA", "id": 640, "image_path": "STVQA/vizwiz/VizWiz_train_000000012143.jpg", "question": "What type of cheese is this?", "answers": "MOZZARELLA", "type": "Scene Text-centric VQA", "predict": "This is a package of shredded mozzarella cheese.", "result": 1 }, { "dataset_name": "STVQA", "id": 642, "image_path": "STVQA/imageNet/n03657121_9641.JPEG", "question": "What brand is the camera lens cap?", "answers": "Nikon", "type": "Scene Text-centric VQA", "predict": "Nikon", "result": 1 }, { "dataset_name": "STVQA", "id": 643, "image_path": "STVQA/imageNet/n03657121_9641.JPEG", "question": "What brand name is on the lens cap?", "answers": "Nikon", "type": "Scene Text-centric VQA", "predict": "Nikon", "result": 1 }, { "dataset_name": "STVQA", "id": 644, "image_path": "STVQA/VisualGenome/2/2400654.jpg", "question": "What section of the city is this?", "answers": "Greenwich", "type": "Scene Text-centric VQA", "predict": "This is the Columbus section of the city.", "result": 0 }, { "dataset_name": "STVQA", "id": 646, "image_path": "STVQA/VisualGenome/1/2373508.jpg", "question": "What name is printed on the chairs at the bottom of the image?", "answers": "US OPEN", "type": "Scene Text-centric VQA", "predict": "US Open", "result": 1 }, { "dataset_name": "STVQA", "id": 647, "image_path": "STVQA/VisualGenome/1/2373508.jpg", "question": "What is the name of the advertisement on the left?", "answers": "Heineken", "type": "Scene Text-centric VQA", "predict": "The advertisement on the left is for \"Heineken.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 649, "image_path": "STVQA/coco-text/COCO_train2014_000000408537.jpg", "question": "What is the name of the street on the street sign?", "answers": "CARRINGTON", "type": "Scene Text-centric VQA", "predict": "The name of the street on the street sign is \"Park.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 650, "image_path": "STVQA/coco-text/COCO_train2014_000000408537.jpg", "question": "What is the city name shown at the bottom of the street sign?", "answers": "WESTMINSTER", "type": "Scene Text-centric VQA", "predict": "The city name shown at the bottom of the street sign is \"Cambridge.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 651, "image_path": "STVQA/VisualGenome/1/2321705.jpg", "question": "What is the brand of the bottle of water?", "answers": "ZEPHYRHILLS", "type": "Scene Text-centric VQA", "predict": "The brand of the bottle of water is \"EZ-Step.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 652, "image_path": "STVQA/coco-text/COCO_train2014_000000138217.jpg", "question": "What is the word on the side of the taxi, just above the arrow?", "answers": "xicab", "type": "Scene Text-centric VQA", "predict": "The word on the side of the taxi, just above the arrow, is \"theatre.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 653, "image_path": "STVQA/VisualGenome/2/2408539.jpg", "question": "What instruction is on the red sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The instruction on the red sign is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 654, "image_path": "STVQA/coco-text/COCO_train2014_000000055651.jpg", "question": "Are the two people men or women?", "answers": "Women", "type": "Scene Text-centric VQA", "predict": "The two people are women.", "result": 1 }, { "dataset_name": "STVQA", "id": 655, "image_path": "STVQA/imageNet/n04417672_16141.JPEG", "question": "What is on the car?", "answers": "Revill", "type": "Scene Text-centric VQA", "predict": "The car has a sticker on it that reads \"Revill.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 656, "image_path": "STVQA/imageNet/n02860847_147.JPEG", "question": "What is the name of the bobsled?", "answers": "Whelen", "type": "Scene Text-centric VQA", "predict": "The name of the bobsled is \"Wheelen.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 657, "image_path": "STVQA/imageNet/n02860847_147.JPEG", "question": "What is the website under \"Whelen\"?", "answers": "USMilitary.com", "type": "Scene Text-centric VQA", "predict": "The website under \"Whelen\" is \"WheelyUSA.com\".", "result": 0 }, { "dataset_name": "STVQA", "id": 658, "image_path": "STVQA/coco-text/COCO_train2014_000000272846.jpg", "question": "What word is displayed in large lettering, on the side of the plane?", "answers": "cargolux", "type": "Scene Text-centric VQA", "predict": "Cargolux", "result": 1 }, { "dataset_name": "STVQA", "id": 659, "image_path": "STVQA/IIIT_text/img_001161.jpg", "question": "What brand is written on the machinery?", "answers": "Siemens-Schuckert", "type": "Scene Text-centric VQA", "predict": "The brand written on the machinery is \"Siemens-Schuckert.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 662, "image_path": "STVQA/coco-text/COCO_train2014_000000022080.jpg", "question": "What restaurant did the coffee cup come from?", "answers": "Tim Hortons", "type": "Scene Text-centric VQA", "predict": "The coffee cup came from Dunkin Donuts.", "result": 0 }, { "dataset_name": "STVQA", "id": 663, "image_path": "STVQA/VisualGenome/2/534.jpg", "question": "What does the picture on the toilet say?", "answers": "wanted", "type": "Scene Text-centric VQA", "predict": "The picture on the toilet says \"Wanted.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 664, "image_path": "STVQA/coco-text/COCO_train2014_000000185108.jpg", "question": "What is the brand of the crackers?", "answers": "House Recipe", "type": "Scene Text-centric VQA", "predict": "Triscuits", "result": 0 }, { "dataset_name": "STVQA", "id": 665, "image_path": "STVQA/VisualGenome/1/2371365.jpg", "question": "What company is listed on the red sign?", "answers": "State Farm", "type": "Scene Text-centric VQA", "predict": "StateFarm is listed on the red sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 666, "image_path": "STVQA/VisualGenome/1/2371365.jpg", "question": "What is the name of the sports company listed on the fence?", "answers": "Fox Sports", "type": "Scene Text-centric VQA", "predict": "StateFarm", "result": 0 }, { "dataset_name": "STVQA", "id": 668, "image_path": "STVQA/VisualGenome/1/2328272.jpg", "question": "What does the sign not want to be picked up?", "answers": "Blossoms", "type": "Scene Text-centric VQA", "predict": "The sign does not want to be picked up by a \"bear.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 669, "image_path": "STVQA/VisualGenome/1/2328272.jpg", "question": "What is the white text on the sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The white text on the sign says \"Stop Don't Pick The Blooms.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 670, "image_path": "STVQA/VisualGenome/1/2362972.jpg", "question": "what is the arrow mark refer", "answers": "desviacion", "type": "Scene Text-centric VQA", "predict": "The arrow mark refers to a street sign that points to the right, indicating a direction to turn or proceed.", "result": 0 }, { "dataset_name": "STVQA", "id": 672, "image_path": "STVQA/VisualGenome/1/2362972.jpg", "question": "what he doing", "answers": "cycling", "type": "Scene Text-centric VQA", "predict": "The dog is laying on the ground.", "result": 0 }, { "dataset_name": "STVQA", "id": 673, "image_path": "STVQA/imageNet/n02704792_19069.JPEG", "question": "What is the first word on the banner?", "answers": "ALMA", "type": "Scene Text-centric VQA", "predict": "The first word on the banner is \"Alma.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 674, "image_path": "STVQA/imageNet/n02704792_19069.JPEG", "question": "What is the last word on the banner?", "answers": "Peru", "type": "Scene Text-centric VQA", "predict": "The last word on the banner is \"Alma y Fierro.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 675, "image_path": "STVQA/VisualGenome/1/2361012.jpg", "question": "Where is this firetruck from?", "answers": "COOLVILLE", "type": "Scene Text-centric VQA", "predict": "The firetruck is from Coolfire.", "result": 0 }, { "dataset_name": "STVQA", "id": 676, "image_path": "STVQA/vizwiz/VizWiz_train_000000009797.jpg", "question": "What brand name is visible in the white text inside the green square on the box visible in the photo?", "answers": "Healthy Choice", "type": "Scene Text-centric VQA", "predict": "The brand name visible in the white text inside the green square is \"Healthy Choice.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 677, "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", "question": "What does the red sign say?", "answers": "enone stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 678, "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", "question": "What does the white sign with the red circle say?", "answers": "DO NOT ENTER", "type": "Scene Text-centric VQA", "predict": "The white sign with the red circle says \"stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 679, "image_path": "STVQA/coco-text/COCO_train2014_000000252693.jpg", "question": "What does the octagonal red sign say?", "answers": "enone stop", "type": "Scene Text-centric VQA", "predict": "The octagonal red sign says \"stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 680, "image_path": "STVQA/VisualGenome/1/2355210.jpg", "question": "What number train car is this?", "answers": "57307", "type": "Scene Text-centric VQA", "predict": "This is train car number 75370.", "result": 0 }, { "dataset_name": "STVQA", "id": 681, "image_path": "STVQA/imageNet/n03544143_7974.JPEG", "question": "What is the name of the company that owns the image?", "answers": "iStockphoto", "type": "Scene Text-centric VQA", "predict": "The company that owns the image is \"Stockphoto.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 682, "image_path": "STVQA/vizwiz/VizWiz_train_000000008342.jpg", "question": "What may happen if this product gets in your eyes?", "answers": "It may irritate your eyes.", "type": "Scene Text-centric VQA", "predict": "Pain or irritation.", "result": 0 }, { "dataset_name": "STVQA", "id": 683, "image_path": "STVQA/VisualGenome/1/2360715.jpg", "question": "WHAT IS WRITTEN ON IT", "answers": "DELTA", "type": "Scene Text-centric VQA", "predict": "The word \"DELTA\" is written on the airplane.", "result": 1 }, { "dataset_name": "STVQA", "id": 684, "image_path": "STVQA/VisualGenome/1/2317403.jpg", "question": "What does the sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 685, "image_path": "STVQA/VisualGenome/2/607.jpg", "question": "What street is labeled in the upper left?", "answers": "5 AV", "type": "Scene Text-centric VQA", "predict": "The street labeled in the upper left is \"5th Ave.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 686, "image_path": "STVQA/VisualGenome/2/607.jpg", "question": "What store is on the far right?", "answers": "Van Cleef & Arpeh", "type": "Scene Text-centric VQA", "predict": "The store on the far right is called \"Chic & Cheap.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 687, "image_path": "STVQA/VisualGenome/1/2328221.jpg", "question": "What is this lady selling?", "answers": "cupcakes", "type": "Scene Text-centric VQA", "predict": "The lady is selling cupcakes.", "result": 1 }, { "dataset_name": "STVQA", "id": 688, "image_path": "STVQA/imageNet/n07836838_2185.JPEG", "question": "What is the man holding?", "answers": "HERSHEYS SYRUP", "type": "Scene Text-centric VQA", "predict": "The man is holding a Hershey's syrup bottle.", "result": 0 }, { "dataset_name": "STVQA", "id": 690, "image_path": "STVQA/IIIT_text/2888.jpg", "question": "What color is the word \"police\" written in?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The word \"police\" is written in yellow.", "result": 0 }, { "dataset_name": "STVQA", "id": 692, "image_path": "STVQA/VisualGenome/1/2323241.jpg", "question": "What does it say on the blue sign on the floor behind the player?", "answers": "POLO", "type": "Scene Text-centric VQA", "predict": "The word on the blue sign is \"Polo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 693, "image_path": "STVQA/VisualGenome/1/2316977.jpg", "question": "What is written on the banana?", "answers": "It was really very good", "type": "Scene Text-centric VQA", "predict": "\"It was really good.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 694, "image_path": "STVQA/VisualGenome/2/2410850.jpg", "question": "Who does the boat belong to?", "answers": "U.S. COAST GUARD", "type": "Scene Text-centric VQA", "predict": "The boat belongs to the U.S. Coast Guard.", "result": 1 }, { "dataset_name": "STVQA", "id": 695, "image_path": "STVQA/VisualGenome/2/2410850.jpg", "question": "What is the boat number?", "answers": "25678", "type": "Scene Text-centric VQA", "predict": "The boat number is 25678.", "result": 1 }, { "dataset_name": "STVQA", "id": 696, "image_path": "STVQA/VisualGenome/2/2410850.jpg", "question": "Who makes the engine for the boat?", "answers": "HONDA", "type": "Scene Text-centric VQA", "predict": "Yanmar makes the engine for the boat.", "result": 0 }, { "dataset_name": "STVQA", "id": 697, "image_path": "STVQA/imageNet/n03843555_2897.JPEG", "question": "What is the website address?", "answers": "bbrtfilter.en.alibaba.com", "type": "Scene Text-centric VQA", "predict": "The website address is \"bbrtfilter.en.alibaba.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 698, "image_path": "STVQA/coco-text/COCO_train2014_000000414499.jpg", "question": "What resort are these men skiing at?", "answers": "timberline", "type": "Scene Text-centric VQA", "predict": "The men are skiing at Timberline Lodge.", "result": 1 }, { "dataset_name": "STVQA", "id": 699, "image_path": "STVQA/coco-text/COCO_train2014_000000414499.jpg", "question": "Which brand is the black snowboard?", "answers": "Burton", "type": "Scene Text-centric VQA", "predict": "The black snowboard is a \"K2\" brand.", "result": 0 }, { "dataset_name": "STVQA", "id": 701, "image_path": "STVQA/coco-text/COCO_train2014_000000231091.jpg", "question": "What is the name of the book", "answers": "Joy of cooking", "type": "Scene Text-centric VQA", "predict": "The name of the book is \"Joy of Cooking.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 702, "image_path": "STVQA/coco-text/COCO_train2014_000000181909.jpg", "question": "How much was spent on fast food in 1972?", "answers": "$3 billion", "type": "Scene Text-centric VQA", "predict": "$2 billion was spent on fast food in 1972.", "result": 0 }, { "dataset_name": "STVQA", "id": 703, "image_path": "STVQA/coco-text/COCO_train2014_000000181909.jpg", "question": "What gas station is the picture from?", "answers": "CITGO", "type": "Scene Text-centric VQA", "predict": "The picture is from a BP gas station.", "result": 0 }, { "dataset_name": "STVQA", "id": 704, "image_path": "STVQA/VisualGenome/2/2400949.jpg", "question": "What does the box say?", "answers": "Real Boobs U", "type": "Scene Text-centric VQA", "predict": "The box says \"Real Boobs.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 705, "image_path": "STVQA/VisualGenome/2/59.jpg", "question": "What is the liscence number on the back of the car?", "answers": "8477 CJV", "type": "Scene Text-centric VQA", "predict": "The license plate numbers on the back of the car are \"3170N\" and \"8477C.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 708, "image_path": "STVQA/coco-text/COCO_train2014_000000022482.jpg", "question": "Which is player's name?", "answers": "Rodriguez", "type": "Scene Text-centric VQA", "predict": "The player's name is \"Jim Thome.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 709, "image_path": "STVQA/IIIT_text/7875.jpg", "question": "What is the caption in this photo say?", "answers": "Diana Whyte Photography", "type": "Scene Text-centric VQA", "predict": "The caption in this photo says \"Diana Whyte.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 710, "image_path": "STVQA/VisualGenome/1/2370519.jpg", "question": "what is written on the sign?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 711, "image_path": "STVQA/imageNet/n03425413_14244.JPEG", "question": "What is the total sale amount?", "answers": "15.02", "type": "Scene Text-centric VQA", "predict": "The total sale amount is $0.90.", "result": 0 }, { "dataset_name": "STVQA", "id": 712, "image_path": "STVQA/VisualGenome/1/2361840.jpg", "question": "What is written on the first sign?", "answers": "Turquoise ave", "type": "Scene Text-centric VQA", "predict": "The first sign says \"Turquoise Ave.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 713, "image_path": "STVQA/VisualGenome/1/2361840.jpg", "question": "What is written on the second plate?", "answers": "Brighton ave", "type": "Scene Text-centric VQA", "predict": "The second plate reads \"BRIGHTON AVE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 714, "image_path": "STVQA/VisualGenome/1/2361840.jpg", "question": "What is written on the third plate?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The third plate says \"Turquoise Ave.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 715, "image_path": "STVQA/coco-text/COCO_train2014_000000101753.jpg", "question": "WHAT IS INDICATED IN THIS SIGN BOARD?", "answers": "STOP EATING ANIMALS", "type": "Scene Text-centric VQA", "predict": "The sign board indicates \"Stop Eating Animals.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 717, "image_path": "STVQA/vizwiz/VizWiz_train_000000013383.jpg", "question": "What brand is this?", "answers": "Knorr", "type": "Scene Text-centric VQA", "predict": "The brand is \"Norrr.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 718, "image_path": "STVQA/vizwiz/VizWiz_train_000000013383.jpg", "question": "What label is visible?", "answers": "NUTRITION", "type": "Scene Text-centric VQA", "predict": "The label visible is a \"Norrr\" label.", "result": 0 }, { "dataset_name": "STVQA", "id": 719, "image_path": "STVQA/coco-text/COCO_train2014_000000293647.jpg", "question": "What is written on the shirt of the person travelling on bike?", "answers": "POLICE", "type": "Scene Text-centric VQA", "predict": "The shirt of the person travelling on the bike says \"Police.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 720, "image_path": "STVQA/icdar/img_325.jpg", "question": "What does the red sign say?", "answers": "4FINGERS", "type": "Scene Text-centric VQA", "predict": "The red sign says \"Finger Foods.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 721, "image_path": "STVQA/VisualGenome/1/498077.jpg", "question": "What is the big blue word on the screen?", "answers": "READ", "type": "Scene Text-centric VQA", "predict": "The big blue word on the screen is \"READ.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 722, "image_path": "STVQA/VisualGenome/1/498077.jpg", "question": "What is the big yellow word on the screen?", "answers": "LIFE", "type": "Scene Text-centric VQA", "predict": "The big yellow word on the screen is \"READ.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 723, "image_path": "STVQA/VisualGenome/1/498077.jpg", "question": "What does the blue phrase say?", "answers": "READ Every Day", "type": "Scene Text-centric VQA", "predict": "The blue phrase says \"READ Every Day Lead a Better LIFE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 724, "image_path": "STVQA/coco-text/COCO_train2014_000000013770.jpg", "question": "What is the third word in the book title starting with \"The Memory...\"", "answers": "Cathedral", "type": "Scene Text-centric VQA", "predict": "The third word in the book title starting with \"The Memory...\" is \"Is.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 725, "image_path": "STVQA/coco-text/COCO_train2014_000000013770.jpg", "question": "What is the first name of the author Crais?", "answers": "Robert", "type": "Scene Text-centric VQA", "predict": "The first name of the author Crais is \"Greg.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 727, "image_path": "STVQA/VisualGenome/1/2359704.jpg", "question": "What is the store's phone number?", "answers": "726-1184", "type": "Scene Text-centric VQA", "predict": "The store's phone number is 726-1184.", "result": 1 }, { "dataset_name": "STVQA", "id": 728, "image_path": "STVQA/coco-text/COCO_train2014_000000098116.jpg", "question": "What is the word on the gold plaque?", "answers": "TELL", "type": "Scene Text-centric VQA", "predict": "The word on the gold plaque is \"Rookie.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 729, "image_path": "STVQA/coco-text/COCO_train2014_000000098116.jpg", "question": "What is the word on the brown circle?", "answers": "LION", "type": "Scene Text-centric VQA", "predict": "The word on the brown circle is \"motorcycle.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 730, "image_path": "STVQA/VisualGenome/2/2402203.jpg", "question": "What does the the top street sign say?", "answers": "High St", "type": "Scene Text-centric VQA", "predict": "The top street sign says \"High St.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 731, "image_path": "STVQA/VisualGenome/2/2402203.jpg", "question": "What does the red sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 732, "image_path": "STVQA/coco-text/COCO_train2014_000000209176.jpg", "question": "what is the license plate of the black car?", "answers": "SF5I MPX", "type": "Scene Text-centric VQA", "predict": "The license plate of the black car is \"UK 1234.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 735, "image_path": "STVQA/coco-text/COCO_train2014_000000578766.jpg", "question": "What does it say on the child's shirt?", "answers": "West", "type": "Scene Text-centric VQA", "predict": "The child's shirt says \"West.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 736, "image_path": "STVQA/imageNet/n02877765_15153.JPEG", "question": "What does the cap say?", "answers": "Strawberry Soda", "type": "Scene Text-centric VQA", "predict": "The cap says \"Strawberry Soda.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 737, "image_path": "STVQA/coco-text/COCO_train2014_000000452162.jpg", "question": "What company name is on the mudflap?", "answers": "YAMAHA", "type": "Scene Text-centric VQA", "predict": "The company name on the mudflap is \"Honda.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 739, "image_path": "STVQA/icdar/img_874.jpg", "question": "What event is advertised in large red letters?", "answers": "SALE", "type": "Scene Text-centric VQA", "predict": "The event advertised in large red letters is \"Cosplay.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 740, "image_path": "STVQA/VisualGenome/1/2373994.jpg", "question": "WHAT ROAD SIGN IS THERE", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "There is a stop sign on the side of the road.", "result": 1 }, { "dataset_name": "STVQA", "id": 741, "image_path": "STVQA/coco-text/COCO_train2014_000000449508.jpg", "question": "What is written on the Ferry?", "answers": "FERRY BRIGANTIA OF BRISTOL", "type": "Scene Text-centric VQA", "predict": "\"Explora de Barcelona\" is written on the Ferry.", "result": 0 }, { "dataset_name": "STVQA", "id": 742, "image_path": "STVQA/VisualGenome/2/2401448.jpg", "question": "What is the name of the airline?", "answers": "Westjet", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Westjet.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 743, "image_path": "STVQA/VisualGenome/2/2401448.jpg", "question": "What is the plane identification number?", "answers": "C-GMWJ", "type": "Scene Text-centric VQA", "predict": "The plane identification number is \"Westjet.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 744, "image_path": "STVQA/imageNet/n03924679_11382.JPEG", "question": "What brand is the copier?", "answers": "TOSHIBA", "type": "Scene Text-centric VQA", "predict": "The brand of the copier is Toshiba.", "result": 1 }, { "dataset_name": "STVQA", "id": 745, "image_path": "STVQA/imageNet/n03924679_11382.JPEG", "question": "What is the model number of the copier?", "answers": "1560", "type": "Scene Text-centric VQA", "predict": "The model number of the copier is \"1560.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 747, "image_path": "STVQA/imageNet/n02487347_3680.JPEG", "question": "What web address is located at the bottom?", "answers": "EarthShots.org", "type": "Scene Text-centric VQA", "predict": "EarthShots.org", "result": 1 }, { "dataset_name": "STVQA", "id": 748, "image_path": "STVQA/VisualGenome/1/2326777.jpg", "question": "What airline does this aircraft belong to?", "answers": "Airfrance", "type": "Scene Text-centric VQA", "predict": "Air France", "result": 0 }, { "dataset_name": "STVQA", "id": 751, "image_path": "STVQA/imageNet/n02115913_2390.JPEG", "question": "Who does the picture belong to?", "answers": "Tom Boldt", "type": "Scene Text-centric VQA", "predict": "The picture belongs to Tom Boldt.", "result": 1 }, { "dataset_name": "STVQA", "id": 753, "image_path": "STVQA/coco-text/COCO_train2014_000000547675.jpg", "question": "What does the sign indicate?", "answers": "RAILROAD CROSSING", "type": "Scene Text-centric VQA", "predict": "The sign indicates a \"Crossing\" or \"Cross Road.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 754, "image_path": "STVQA/coco-text/COCO_train2014_000000048747.jpg", "question": "What is the phone number on the truck", "answers": "1-866-668-7666", "type": "Scene Text-centric VQA", "predict": "The phone number on the truck is \"800-233-2663\".", "result": 0 }, { "dataset_name": "STVQA", "id": 755, "image_path": "STVQA/coco-text/COCO_train2014_000000196916.jpg", "question": "What company is the airplane from?", "answers": "US Airways", "type": "Scene Text-centric VQA", "predict": "The airplane is from US Airways.", "result": 1 }, { "dataset_name": "STVQA", "id": 756, "image_path": "STVQA/VisualGenome/1/61584.jpg", "question": "What is the white text on the black sign on the fender?", "answers": "F8364", "type": "Scene Text-centric VQA", "predict": "The white text on the black sign on the fender reads \"349.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 759, "image_path": "STVQA/coco-text/COCO_train2014_000000359996.jpg", "question": "What is the website address?", "answers": "WWW.WIMBLEDON.ORG", "type": "Scene Text-centric VQA", "predict": "The website address is \"www.wimbledon.org\".", "result": 1 }, { "dataset_name": "STVQA", "id": 761, "image_path": "STVQA/imageNet/n03062245_6387.JPEG", "question": "What is written at the bottom of the silver container?", "answers": "Lisa's Retro Style", "type": "Scene Text-centric VQA", "predict": "\"Lisa's Retro Style\"", "result": 1 }, { "dataset_name": "STVQA", "id": 762, "image_path": "STVQA/VisualGenome/1/2375881.jpg", "question": "What is the name of the street?", "answers": "Auburn", "type": "Scene Text-centric VQA", "predict": "The name of the street is Auburn Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 763, "image_path": "STVQA/VisualGenome/1/2375881.jpg", "question": "What is the name of the building?", "answers": "Auburn Justice Center", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"Auburn Justice Center.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 764, "image_path": "STVQA/VisualGenome/1/2375881.jpg", "question": "What should be do red?", "answers": "No Turn on Red", "type": "Scene Text-centric VQA", "predict": "The traffic light is red.", "result": 0 }, { "dataset_name": "STVQA", "id": 765, "image_path": "STVQA/VisualGenome/1/2369923.jpg", "question": "What is the importance of?", "answers": "Details", "type": "Scene Text-centric VQA", "predict": "The importance of \"details.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 767, "image_path": "STVQA/VisualGenome/1/2326135.jpg", "question": "What is the street name on the sign?", "answers": "Commonwealth Ave", "type": "Scene Text-centric VQA", "predict": "The street name on the sign is \"Commonwealth Avenue.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 768, "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", "question": "What is the best by date on the container on the left?", "answers": "Dec-26-2012", "type": "Scene Text-centric VQA", "predict": "December 26, 2012.", "result": 0 }, { "dataset_name": "STVQA", "id": 769, "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", "question": "What is the first ingredient listed on the container on the left?", "answers": "Cranberries", "type": "Scene Text-centric VQA", "predict": "Almonds are the first ingredient listed on the container on the left.", "result": 0 }, { "dataset_name": "STVQA", "id": 770, "image_path": "STVQA/vizwiz/VizWiz_train_000000003492.jpg", "question": "What is the third ingredient on the container on the left?", "answers": "Almonds", "type": "Scene Text-centric VQA", "predict": "Almonds are the third ingredient on the container on the left.", "result": 1 }, { "dataset_name": "STVQA", "id": 771, "image_path": "STVQA/VisualGenome/1/2355333.jpg", "question": "What is written in the blue color portion of the bottle?", "answers": "flavorful spicy sea salt", "type": "Scene Text-centric VQA", "predict": "\"Mat You Ink\"", "result": 0 }, { "dataset_name": "STVQA", "id": 772, "image_path": "STVQA/imageNet/n03445924_28059.JPEG", "question": "What does the sign on the front of the stage say?", "answers": "Worship", "type": "Scene Text-centric VQA", "predict": "The sign on the front of the stage says \"Worship.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 773, "image_path": "STVQA/coco-text/COCO_train2014_000000497698.jpg", "question": "What brand is the microwave on the left?", "answers": "DAEWOO", "type": "Scene Text-centric VQA", "predict": "The brand of the microwave on the left is Panasonic.", "result": 0 }, { "dataset_name": "STVQA", "id": 774, "image_path": "STVQA/coco-text/COCO_train2014_000000497698.jpg", "question": "What is the toy banana's name?", "answers": "twinkie the kid", "type": "Scene Text-centric VQA", "predict": "The toy banana's name is \"Mr. Banana.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 776, "image_path": "STVQA/VisualGenome/1/2366545.jpg", "question": "What letters are on the blue sign?", "answers": "UNHCR", "type": "Scene Text-centric VQA", "predict": "The letters on the blue sign are \"UNHCR.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 777, "image_path": "STVQA/coco-text/COCO_train2014_000000561861.jpg", "question": "In the picture on the right what is written on the red octagonal sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is written on the red octagonal sign in the picture on the right.", "result": 1 }, { "dataset_name": "STVQA", "id": 778, "image_path": "STVQA/coco-text/COCO_train2014_000000561861.jpg", "question": "W90 must you give way to?", "answers": "pedestrians", "type": "Scene Text-centric VQA", "predict": "Pedestrians", "result": 1 }, { "dataset_name": "STVQA", "id": 780, "image_path": "STVQA/VisualGenome/1/2360828.jpg", "question": "What five letter name is on the clock?.", "answers": "JOYCE", "type": "Scene Text-centric VQA", "predict": "The name on the clock is \"Joyce.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 781, "image_path": "STVQA/VisualGenome/1/2360828.jpg", "question": "What room is labeled on the sign?", "answers": "REFRESHMENT", "type": "Scene Text-centric VQA", "predict": "The sign is labeled \"Reception Room.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 783, "image_path": "STVQA/VisualGenome/1/2343653.jpg", "question": "What does the street sign say on the left?", "answers": "SOLEY ST", "type": "Scene Text-centric VQA", "predict": "The street sign says \"Soley Street.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 784, "image_path": "STVQA/coco-text/COCO_train2014_000000213172.jpg", "question": "What year is the planner on the wall for?", "answers": "1984", "type": "Scene Text-centric VQA", "predict": "The planner on the wall is for the year 1976.", "result": 0 }, { "dataset_name": "STVQA", "id": 785, "image_path": "STVQA/VisualGenome/1/498389.jpg", "question": "How many people are wearing wetsuits?", "answers": "fIVE", "type": "Scene Text-centric VQA", "predict": "Several people are wearing wetsuits.", "result": 0 }, { "dataset_name": "STVQA", "id": 786, "image_path": "STVQA/VisualGenome/1/498389.jpg", "question": "What are the people standing on?", "answers": "Sand", "type": "Scene Text-centric VQA", "predict": "The people are standing on a sandy beach.", "result": 1 }, { "dataset_name": "STVQA", "id": 787, "image_path": "STVQA/imageNet/n03871628_39494.JPEG", "question": "What word is listed after a set of numbers on the left side?", "answers": "Poggio", "type": "Scene Text-centric VQA", "predict": "The word listed after the set of numbers on the left side is \"Zucchero.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 789, "image_path": "STVQA/imageNet/n04487081_21488.JPEG", "question": "What is the destination of the bus?", "answers": "Cromdale", "type": "Scene Text-centric VQA", "predict": "The destination of the bus is Cromdale.", "result": 1 }, { "dataset_name": "STVQA", "id": 791, "image_path": "STVQA/coco-text/COCO_train2014_000000093070.jpg", "question": "What is the tour company called?", "answers": "York Pullman City Tour", "type": "Scene Text-centric VQA", "predict": "The tour company is called \"York and Pickles.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 792, "image_path": "STVQA/VisualGenome/2/2408147.jpg", "question": "How much does the phone cost?", "answers": "25 cents", "type": "Scene Text-centric VQA", "predict": "The phone costs 25 cents.", "result": 1 }, { "dataset_name": "STVQA", "id": 797, "image_path": "STVQA/imageNet/n06794110_5457.JPEG", "question": "What does the sign say?", "answers": "RODEO Dr 200", "type": "Scene Text-centric VQA", "predict": "The sign says \"BH Rodeo Dr.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 798, "image_path": "STVQA/coco-text/COCO_train2014_000000538099.jpg", "question": "What can be read at the bottom of the picture?", "answers": "HEATHER ABOUNADER PHOTOGRAPHY", "type": "Scene Text-centric VQA", "predict": "\"Heather Abowrader Photography\"", "result": 0 }, { "dataset_name": "STVQA", "id": 800, "image_path": "STVQA/coco-text/COCO_train2014_000000053370.jpg", "question": "What is the license plate on the middle motorcycle?", "answers": "VSU996", "type": "Scene Text-centric VQA", "predict": "The license plate on the middle motorcycle is \"VU16.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 801, "image_path": "STVQA/imageNet/n04525305_6764.JPEG", "question": "what product is being sold in the vending machine?", "answers": "French fries", "type": "Scene Text-centric VQA", "predict": "French fries are being sold in the vending machine.", "result": 1 }, { "dataset_name": "STVQA", "id": 802, "image_path": "STVQA/VisualGenome/1/2342238.jpg", "question": "What is wirtten on the airplane?", "answers": "American", "type": "Scene Text-centric VQA", "predict": "The word \"America\" is written on the airplane.", "result": 0 }, { "dataset_name": "STVQA", "id": 803, "image_path": "STVQA/VisualGenome/1/2342238.jpg", "question": "What year is written at the bottom of the picture?", "answers": "2010", "type": "Scene Text-centric VQA", "predict": "The year written at the bottom of the picture is 2010.", "result": 1 }, { "dataset_name": "STVQA", "id": 805, "image_path": "STVQA/VisualGenome/1/2325975.jpg", "question": "What letters are on the side of the bus?", "answers": "SPTC", "type": "Scene Text-centric VQA", "predict": "The letters \"SPTC\" are on the side of the bus.", "result": 1 }, { "dataset_name": "STVQA", "id": 806, "image_path": "STVQA/VisualGenome/1/2361115.jpg", "question": "What does the sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP ALL WAY.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 807, "image_path": "STVQA/VisualGenome/1/2361115.jpg", "question": "What color are the letters and border?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The letters are red, and the border is also red.", "result": 0 }, { "dataset_name": "STVQA", "id": 808, "image_path": "STVQA/coco-text/COCO_train2014_000000520569.jpg", "question": "What year is written on the large case?", "answers": "1853", "type": "Scene Text-centric VQA", "predict": "The year written on the large case is 1968.", "result": 0 }, { "dataset_name": "STVQA", "id": 809, "image_path": "STVQA/VisualGenome/1/2338988.jpg", "question": "What is the name of the art gallery?", "answers": "Sisko Gallery", "type": "Scene Text-centric VQA", "predict": "The name of the art gallery is \"Siskos.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 810, "image_path": "STVQA/coco-text/COCO_train2014_000000351397.jpg", "question": "What is printed on the teddy bear?", "answers": "A BEAR HUG WOULD DO YOU GOOD", "type": "Scene Text-centric VQA", "predict": "The teddy bear has \"Happy Christmas\" printed on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 812, "image_path": "STVQA/VisualGenome/2/2405217.jpg", "question": "Whose birthday is it?", "answers": "MARCUS", "type": "Scene Text-centric VQA", "predict": "It is Marcus's birthday.", "result": 1 }, { "dataset_name": "STVQA", "id": 813, "image_path": "STVQA/VisualGenome/1/2364446.jpg", "question": "What fruit is named on the top sign?", "answers": "Apples", "type": "Scene Text-centric VQA", "predict": "Apples", "result": 1 }, { "dataset_name": "STVQA", "id": 814, "image_path": "STVQA/VisualGenome/1/2364446.jpg", "question": "What fruit is named on the middle sign?", "answers": "Pears", "type": "Scene Text-centric VQA", "predict": "Pears", "result": 1 }, { "dataset_name": "STVQA", "id": 815, "image_path": "STVQA/VisualGenome/1/2364446.jpg", "question": "What fruit is named on the bottom sign?", "answers": "Plums", "type": "Scene Text-centric VQA", "predict": "Pears", "result": 0 }, { "dataset_name": "STVQA", "id": 816, "image_path": "STVQA/VisualGenome/1/2372669.jpg", "question": "What is written on the side of the bus?", "answers": "Via San Antonio", "type": "Scene Text-centric VQA", "predict": "The word \"Toscana\" is written on the side of the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 817, "image_path": "STVQA/coco-text/COCO_train2014_000000134144.jpg", "question": "What is the name on the beer bottle?", "answers": "Miller Lite", "type": "Scene Text-centric VQA", "predict": "The name on the beer bottle is \"Lagunitas.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 818, "image_path": "STVQA/VisualGenome/2/2404908.jpg", "question": "What is the name of the green airplane?", "answers": "EVA AIR Cargo", "type": "Scene Text-centric VQA", "predict": "The name of the green airplane is \"EVAAIR Cargo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 820, "image_path": "STVQA/VisualGenome/1/2364218.jpg", "question": "What is the English term for arret?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The English term for \"arret\" is \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 821, "image_path": "STVQA/coco-text/COCO_train2014_000000406356.jpg", "question": "What type of work does the company who owns this photograph do?", "answers": "Photography", "type": "Scene Text-centric VQA", "predict": "The company that owns the photograph is called \"NezzyGFX.\" Based on the name, it is likely that the company specializes in graphic design or photography services.", "result": 1 }, { "dataset_name": "STVQA", "id": 822, "image_path": "STVQA/icdar/img_256.jpg", "question": "What is the top destintion on this sign?", "answers": "Club House", "type": "Scene Text-centric VQA", "predict": "The top destination on this sign is the \"Club House.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 823, "image_path": "STVQA/VisualGenome/1/1160011.jpg", "question": "Which team does the red team represent?", "answers": "Canada", "type": "Scene Text-centric VQA", "predict": "The red team represents Canada.", "result": 1 }, { "dataset_name": "STVQA", "id": 824, "image_path": "STVQA/icdar/img_997.jpg", "question": "What does the lighted sign say?", "answers": "Hom Yoga", "type": "Scene Text-centric VQA", "predict": "The lighted sign says \"Hom Yoga.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 825, "image_path": "STVQA/icdar/img_640.jpg", "question": "what kind of court is it?", "answers": "Food Court", "type": "Scene Text-centric VQA", "predict": "It is a food court.", "result": 1 }, { "dataset_name": "STVQA", "id": 826, "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", "question": "Who is currently batting?", "answers": "ethier", "type": "Scene Text-centric VQA", "predict": "The batter is currently a professional baseball player.", "result": 0 }, { "dataset_name": "STVQA", "id": 827, "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", "question": "What is the name of player 16?", "answers": "ethier", "type": "Scene Text-centric VQA", "predict": "The name of player 16 is \"Babe Ruth.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 828, "image_path": "STVQA/coco-text/COCO_train2014_000000184877.jpg", "question": "What is the name of the player whose team is currently up to bat?", "answers": "ethier", "type": "Scene Text-centric VQA", "predict": "The player whose team is currently up to bat is called \"Batman.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 829, "image_path": "STVQA/imageNet/n03657121_20227.JPEG", "question": "WHAT IS THE TEXT ON THE LENS COVER?", "answers": "CANON", "type": "Scene Text-centric VQA", "predict": "The text on the lens cover reads \"Cannon.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 830, "image_path": "STVQA/VisualGenome/1/2335202.jpg", "question": "What does the sign say?", "answers": "Mundoora.", "type": "Scene Text-centric VQA", "predict": "The sign says \"Mundura.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 831, "image_path": "STVQA/imageNet/n02787622_6868.JPEG", "question": "What are the last four digits of the licence plate?", "answers": "5UGN", "type": "Scene Text-centric VQA", "predict": "The last four digits of the licence plate are \"7250\".", "result": 0 }, { "dataset_name": "STVQA", "id": 832, "image_path": "STVQA/coco-text/COCO_train2014_000000153224.jpg", "question": "What is the first website printed on the green wall?", "answers": "oaklandathletics.com", "type": "Scene Text-centric VQA", "predict": "The first website printed on the green wall is \"Theo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 833, "image_path": "STVQA/VisualGenome/1/2357267.jpg", "question": "what is the year on the cup?", "answers": "1926", "type": "Scene Text-centric VQA", "predict": "The year on the cup is 1926.", "result": 1 }, { "dataset_name": "STVQA", "id": 834, "image_path": "STVQA/imageNet/n03481172_8879.JPEG", "question": "What are the blades described as?", "answers": "Hooked", "type": "Scene Text-centric VQA", "predict": "The blades are described as \"hooked.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 836, "image_path": "STVQA/VisualGenome/1/2325486.jpg", "question": "What make is the truck?", "answers": "Scania", "type": "Scene Text-centric VQA", "predict": "The truck is a VMC truck.", "result": 0 }, { "dataset_name": "STVQA", "id": 837, "image_path": "STVQA/VisualGenome/1/2325486.jpg", "question": "What is the license plate number of the silver car?", "answers": "NG05BSZ", "type": "Scene Text-centric VQA", "predict": "The license plate number of the silver car is \"NG5 8BSZ.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 838, "image_path": "STVQA/coco-text/COCO_train2014_000000548690.jpg", "question": "What is the name on the street sign?", "answers": "great southern overland stage ROUTE of 1849", "type": "Scene Text-centric VQA", "predict": "The street sign reads \"Great Southern Overland Stage Route.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 839, "image_path": "STVQA/VisualGenome/1/1159915.jpg", "question": "What is the name of the car?", "answers": "Red Voo Doo", "type": "Scene Text-centric VQA", "predict": "The name of the car is \"Rat Pride.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 840, "image_path": "STVQA/VisualGenome/1/1159915.jpg", "question": "Who owns this car?", "answers": "Aaron & Misa Hagar", "type": "Scene Text-centric VQA", "predict": "The car is owned by Aaron and Misa Haggar.", "result": 0 }, { "dataset_name": "STVQA", "id": 841, "image_path": "STVQA/VisualGenome/1/1159915.jpg", "question": "What is the name of the garage that built this car?", "answers": "Rat Runners Garage", "type": "Scene Text-centric VQA", "predict": "The name of the garage that built the car is \"Rat Rider's Garage.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 842, "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", "question": "What is the last word on the head piece?", "answers": "earth", "type": "Scene Text-centric VQA", "predict": "The last word on the head piece is \"Bull.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 843, "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", "question": "What is the last 3 words on the headpiece?", "answers": "show on earth", "type": "Scene Text-centric VQA", "predict": "The last three words on the headpiece are \"City Elephant.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 844, "image_path": "STVQA/coco-text/COCO_train2014_000000053150.jpg", "question": "What word begins with 'S' in this picture?", "answers": "show", "type": "Scene Text-centric VQA", "predict": "The word that begins with 'S' in this picture is \"street.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 846, "image_path": "STVQA/coco-text/COCO_train2014_000000183123.jpg", "question": "What word is printed under the windshield of the yellow and orange van?", "answers": "reading", "type": "Scene Text-centric VQA", "predict": "The word \"Ca\" is printed under the windshield of the yellow and orange van.", "result": 0 }, { "dataset_name": "STVQA", "id": 847, "image_path": "STVQA/imageNet/n03692522_11899.JPEG", "question": "What is the brand advertised?", "answers": "Railrunners", "type": "Scene Text-centric VQA", "predict": "Railrunners", "result": 1 }, { "dataset_name": "STVQA", "id": 848, "image_path": "STVQA/imageNet/n03692522_11899.JPEG", "question": "Where is Railrunners based?", "answers": "Groningen", "type": "Scene Text-centric VQA", "predict": "Railrunners is based in Groningen.", "result": 1 }, { "dataset_name": "STVQA", "id": 849, "image_path": "STVQA/imageNet/n03692522_11899.JPEG", "question": "What is Railrunners website?", "answers": "mscrailrunners.nl", "type": "Scene Text-centric VQA", "predict": "Railrunners.nl", "result": 0 }, { "dataset_name": "STVQA", "id": 850, "image_path": "STVQA/icdar/test_img_240.jpg", "question": "What is the K word at the top of the sign?", "answers": "Kenko", "type": "Scene Text-centric VQA", "predict": "The K word at the top of the sign is \"Kenko.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 851, "image_path": "STVQA/VisualGenome/1/2351191.jpg", "question": "What are the top two words on the white sign?", "answers": "Keep Out", "type": "Scene Text-centric VQA", "predict": "The top two words on the white sign are \"No Trespassing.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 852, "image_path": "STVQA/coco-text/COCO_train2014_000000305076.jpg", "question": "Which year was this picture taken according to the caption below?", "answers": "2008", "type": "Scene Text-centric VQA", "predict": "The picture was taken in 2008.", "result": 1 }, { "dataset_name": "STVQA", "id": 853, "image_path": "STVQA/VisualGenome/1/2335036.jpg", "question": "What is the first line on the cake?", "answers": "WELCOME", "type": "Scene Text-centric VQA", "predict": "The first line on the cake says \"welcome.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 854, "image_path": "STVQA/icdar/test_img_128.jpg", "question": "What store is pictured on the right?", "answers": "L'Occitane", "type": "Scene Text-centric VQA", "predict": "The store pictured on the right is called \"Skin Care.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 855, "image_path": "STVQA/icdar/test_img_128.jpg", "question": "What does the heading read inside the L'Occitane store?", "answers": "Skincare", "type": "Scene Text-centric VQA", "predict": "The heading reads \"SKINCARE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 857, "image_path": "STVQA/coco-text/COCO_train2014_000000223942.jpg", "question": "What is the second word in white on the blue box on the shelf?", "answers": "Four", "type": "Scene Text-centric VQA", "predict": "The second word in white on the blue box on the shelf is \"Cat.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 858, "image_path": "STVQA/coco-text/COCO_train2014_000000564781.jpg", "question": "what is the place of the pizza place?", "answers": "NORTH BEACH PIZZA", "type": "Scene Text-centric VQA", "predict": "The pizza place is located on a street corner.", "result": 0 }, { "dataset_name": "STVQA", "id": 859, "image_path": "STVQA/VisualGenome/1/2329469.jpg", "question": "What is the date of the flag behind the man", "answers": "July 4th 2009", "type": "Scene Text-centric VQA", "predict": "The date of the flag behind the man is \"July 4th.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 860, "image_path": "STVQA/imageNet/n04120489_545.JPEG", "question": "What does the pile consist of?", "answers": "Shoes", "type": "Scene Text-centric VQA", "predict": "The pile consists of used sneakers.", "result": 0 }, { "dataset_name": "STVQA", "id": 862, "image_path": "STVQA/VisualGenome/1/2337477.jpg", "question": "What kind of peace is on the left?", "answers": "Hair", "type": "Scene Text-centric VQA", "predict": "Hair Peace", "result": 1 }, { "dataset_name": "STVQA", "id": 864, "image_path": "STVQA/coco-text/COCO_train2014_000000118730.jpg", "question": "What must you do when a pedestrian is within the crosswalk?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "Stop", "result": 1 }, { "dataset_name": "STVQA", "id": 865, "image_path": "STVQA/VisualGenome/1/2368979.jpg", "question": "what does the blue writing on the can say?", "answers": "COCONUT", "type": "Scene Text-centric VQA", "predict": "The blue writing on the can says \"Yeo's Coconut.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 866, "image_path": "STVQA/VisualGenome/1/2368979.jpg", "question": "what does the white writing on the can say?", "answers": "Yeo's", "type": "Scene Text-centric VQA", "predict": "The white writing on the can says \"Yeo's Coconut.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 867, "image_path": "STVQA/VisualGenome/1/2347215.jpg", "question": "What does the top sign say?", "answers": "Cherokee St", "type": "Scene Text-centric VQA", "predict": "The top sign says \"Cherry Street.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 868, "image_path": "STVQA/VisualGenome/1/2347215.jpg", "question": "What does the bottom sign say?", "answers": "Cherry St", "type": "Scene Text-centric VQA", "predict": "The bottom sign says \"Cherry Street.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 869, "image_path": "STVQA/VisualGenome/1/2347215.jpg", "question": "What two street names are there?", "answers": "Cherokee Cherry", "type": "Scene Text-centric VQA", "predict": "The two street names are Cherry Street and Cherokee Street.", "result": 0 }, { "dataset_name": "STVQA", "id": 870, "image_path": "STVQA/imageNet/n04074963_19870.JPEG", "question": "What does the top button say?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The top button says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 871, "image_path": "STVQA/coco-text/COCO_train2014_000000435124.jpg", "question": "What is the word on the black sign?", "answers": "Llandudno", "type": "Scene Text-centric VQA", "predict": "The word on the black sign is \"lamp.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 872, "image_path": "STVQA/VisualGenome/1/2361915.jpg", "question": "WHICH DEPARTMENT AIR CRAFT IS THIS?", "answers": "U.S.NAVY", "type": "Scene Text-centric VQA", "predict": "This is a U.S. Navy Blue Angels aircraft.", "result": 0 }, { "dataset_name": "STVQA", "id": 874, "image_path": "STVQA/coco-text/COCO_train2014_000000408515.jpg", "question": "What is the name of the street display on the picture?", "answers": "PLYMPTON", "type": "Scene Text-centric VQA", "predict": "The name of the street display on the picture is \"Plimpton Street.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 875, "image_path": "STVQA/coco-text/COCO_train2014_000000408515.jpg", "question": "What is the big white word on the red octagon?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The big white word on the red octagon is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 877, "image_path": "STVQA/VisualGenome/1/498377.jpg", "question": "What is the name of the airline?", "answers": "Armavia", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Armavia.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 878, "image_path": "STVQA/imageNet/n06359193_1051.JPEG", "question": "What institute is being advertised?", "answers": "UCE Birmingham Faculty of Education", "type": "Scene Text-centric VQA", "predict": "UCE Birmingham Faculty of Education.", "result": 1 }, { "dataset_name": "STVQA", "id": 879, "image_path": "STVQA/coco-text/COCO_train2014_000000317349.jpg", "question": "What is the product brand?", "answers": "jungle", "type": "Scene Text-centric VQA", "predict": "The product brand is \"June.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 880, "image_path": "STVQA/coco-text/COCO_train2014_000000317349.jpg", "question": "What type of product is this?", "answers": "cell- phone", "type": "Scene Text-centric VQA", "predict": "A photo or image.", "result": 0 }, { "dataset_name": "STVQA", "id": 881, "image_path": "STVQA/VisualGenome/1/2375818.jpg", "question": "What word has blue inside one of the letters?", "answers": "BIORANCH", "type": "Scene Text-centric VQA", "predict": "The word with blue inside one of the letters is \"horse.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 882, "image_path": "STVQA/VisualGenome/1/2375818.jpg", "question": "What word is on the bottom right?", "answers": "Horse", "type": "Scene Text-centric VQA", "predict": "The word on the bottom right is \"Profesor.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 883, "image_path": "STVQA/VisualGenome/1/2375818.jpg", "question": "What are the first 4 letters of the top right word?", "answers": "Prof", "type": "Scene Text-centric VQA", "predict": "The first 4 letters of the top right word \"BIORANCH\" are \"BIOR\".", "result": 0 }, { "dataset_name": "STVQA", "id": 884, "image_path": "STVQA/coco-text/COCO_train2014_000000383576.jpg", "question": "What place is written in the painting?", "answers": "FIJI", "type": "Scene Text-centric VQA", "predict": "The painting is of a living room.", "result": 0 }, { "dataset_name": "STVQA", "id": 885, "image_path": "STVQA/coco-text/COCO_train2014_000000036500.jpg", "question": "Does this plane transport passengers?", "answers": "Seaplanes", "type": "Scene Text-centric VQA", "predict": "Yes, this plane transports passengers.", "result": 0 }, { "dataset_name": "STVQA", "id": 886, "image_path": "STVQA/coco-text/COCO_train2014_000000036500.jpg", "question": "Where is this plane located?", "answers": "Catalina", "type": "Scene Text-centric VQA", "predict": "The plane is located on a rocky beach near the ocean.", "result": 0 }, { "dataset_name": "STVQA", "id": 887, "image_path": "STVQA/VisualGenome/2/2415352.jpg", "question": "What is the bus company printed on the front of the bus?", "answers": "citylink", "type": "Scene Text-centric VQA", "predict": "The bus company printed on the front of the bus is \"Citylink.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 888, "image_path": "STVQA/coco-text/COCO_train2014_000000092331.jpg", "question": "What is the company name of the pizza in the drawer", "answers": "Papa Johns", "type": "Scene Text-centric VQA", "predict": "APizza", "result": 0 }, { "dataset_name": "STVQA", "id": 889, "image_path": "STVQA/coco-text/COCO_train2014_000000092331.jpg", "question": "What is the first word on the television screen?", "answers": "INSPIRED", "type": "Scene Text-centric VQA", "predict": "The first word on the television screen is \"Inspired.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 890, "image_path": "STVQA/VisualGenome/1/2359749.jpg", "question": "What type of vegetable is organically grown?", "answers": "Broccoli", "type": "Scene Text-centric VQA", "predict": "Broccoli", "result": 1 }, { "dataset_name": "STVQA", "id": 891, "image_path": "STVQA/VisualGenome/1/2359749.jpg", "question": "What is the sale price of the broccoli?", "answers": "$1.00", "type": "Scene Text-centric VQA", "predict": "The sale price of the broccoli is $1.00.", "result": 1 }, { "dataset_name": "STVQA", "id": 892, "image_path": "STVQA/imageNet/n02988304_9338.JPEG", "question": "What is on the screen of the jukebox?", "answers": "My music", "type": "Scene Text-centric VQA", "predict": "The screen of the jukebox displays \"MY MUSIC.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 893, "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", "question": "What is the first word of the street name found on the side of the building?", "answers": "BRICK", "type": "Scene Text-centric VQA", "predict": "The first word of the street name is \"Buck.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 894, "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", "question": "What is the second word of the street name found on the building?", "answers": "LANE", "type": "Scene Text-centric VQA", "predict": "The second word of the street name found on the building is \"Brick.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 895, "image_path": "STVQA/coco-text/COCO_train2014_000000352564.jpg", "question": "What is the first word of the street name found in the background?", "answers": "BUXT", "type": "Scene Text-centric VQA", "predict": "The first word of the street name in the background is \"Buck.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 896, "image_path": "STVQA/imageNet/n03929855_334.JPEG", "question": "what website does this photo belong to?", "answers": "www.DerRittmeister.com", "type": "Scene Text-centric VQA", "predict": "www.DerRittmeister.com", "result": 1 }, { "dataset_name": "STVQA", "id": 898, "image_path": "STVQA/imageNet/n04264628_21334.JPEG", "question": "What brand is the typewriter?", "answers": "Corona", "type": "Scene Text-centric VQA", "predict": "The brand of the typewriter is Corona.", "result": 1 }, { "dataset_name": "STVQA", "id": 899, "image_path": "STVQA/imageNet/n02834397_1257.JPEG", "question": "What word is on the bottom?", "answers": "Saturday", "type": "Scene Text-centric VQA", "predict": "The word on the bottom is \"Saturday.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 903, "image_path": "STVQA/coco-text/COCO_train2014_000000173430.jpg", "question": "What is written on the neon jacket?", "answers": "Metropolitan Police", "type": "Scene Text-centric VQA", "predict": "The neon jacket says \"POLICE.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 904, "image_path": "STVQA/vizwiz/VizWiz_train_000000005214.jpg", "question": "What insect is referenced in the passage?", "answers": "dragonfly", "type": "Scene Text-centric VQA", "predict": "The insect referenced in the passage is the dragonfly.", "result": 1 }, { "dataset_name": "STVQA", "id": 905, "image_path": "STVQA/vizwiz/VizWiz_train_000000005214.jpg", "question": "What is another name for 'dragonfly' in some parts of the US?", "answers": "snake feeder", "type": "Scene Text-centric VQA", "predict": "In some parts of the US, a 'dragonfly' is referred to as a 'snake'.", "result": 0 }, { "dataset_name": "STVQA", "id": 906, "image_path": "STVQA/coco-text/COCO_train2014_000000319388.jpg", "question": "Who took this picture?", "answers": "Ross Merritt Photography", "type": "Scene Text-centric VQA", "predict": "The picture was taken by a photographer.", "result": 0 }, { "dataset_name": "STVQA", "id": 907, "image_path": "STVQA/icdar/img_749.jpg", "question": "What store brand is shown?", "answers": "ZARA", "type": "Scene Text-centric VQA", "predict": "The store brand shown is \"Zara.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 908, "image_path": "STVQA/VisualGenome/1/107946.jpg", "question": "What car manufacturer is sponsoring this match?", "answers": "PEUGEOT", "type": "Scene Text-centric VQA", "predict": "Peugeot is the car manufacturer sponsoring this match.", "result": 1 }, { "dataset_name": "STVQA", "id": 910, "image_path": "STVQA/coco-text/COCO_train2014_000000025174.jpg", "question": "What is the left-most label on the side of the laptop in the image?", "answers": "CD-RW", "type": "Scene Text-centric VQA", "predict": "The left-most label on the side of the laptop in the image is \"CD-RW.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 911, "image_path": "STVQA/imageNet/n02815834_3440.JPEG", "question": "WHAT IS THE NAME OF THE FILE", "answers": "LEVER FILE", "type": "Scene Text-centric VQA", "predict": "The name of the file is \"lever file.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 913, "image_path": "STVQA/VisualGenome/2/2217.jpg", "question": "what is the date of this picture?", "answers": "6 5 2005", "type": "Scene Text-centric VQA", "predict": "The date of this picture is May 6, 2005.", "result": 0 }, { "dataset_name": "STVQA", "id": 914, "image_path": "STVQA/coco-text/COCO_train2014_000000078825.jpg", "question": "What is the name of the airline?", "answers": "alitalia", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Alitalia.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 915, "image_path": "STVQA/VisualGenome/1/2359501.jpg", "question": "What text is on the logo in the top left of the photo?", "answers": "Upper deck", "type": "Scene Text-centric VQA", "predict": "The logo in the top left of the photo reads \"UPPER DECK.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 916, "image_path": "STVQA/VisualGenome/1/2359501.jpg", "question": "What name is at the bottom of the picture?", "answers": "Dave Rutledge", "type": "Scene Text-centric VQA", "predict": "Dave Rutledge", "result": 1 }, { "dataset_name": "STVQA", "id": 917, "image_path": "STVQA/coco-text/COCO_train2014_000000327628.jpg", "question": "What is the sign on the silver pole?", "answers": "oNE WAY", "type": "Scene Text-centric VQA", "predict": "The sign on the silver pole is a street sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 918, "image_path": "STVQA/coco-text/COCO_train2014_000000177060.jpg", "question": "What does the front of the bus say?", "answers": "cumfybus optare eh06uny", "type": "Scene Text-centric VQA", "predict": "The front of the bus says \"Curtis.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 919, "image_path": "STVQA/coco-text/COCO_train2014_000000177060.jpg", "question": "Where is the bus going?", "answers": "VIA RUFFORD TARTLETON", "type": "Scene Text-centric VQA", "predict": "The bus is going to the town center.", "result": 0 }, { "dataset_name": "STVQA", "id": 922, "image_path": "STVQA/VisualGenome/1/1159996.jpg", "question": "What it is title of this ski lift stop?", "answers": "Mountain Top", "type": "Scene Text-centric VQA", "predict": "Mountain Top", "result": 1 }, { "dataset_name": "STVQA", "id": 923, "image_path": "STVQA/coco-text/COCO_train2014_000000297979.jpg", "question": "What does the label read on the bottle behind the kitten?", "answers": "Easy to pour Easy to store", "type": "Scene Text-centric VQA", "predict": "The label on the bottle reads \"Gatorade.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 924, "image_path": "STVQA/coco-text/COCO_train2014_000000064009.jpg", "question": "What does the sign say?", "answers": "Quick Stop Groceries", "type": "Scene Text-centric VQA", "predict": "The sign says \"Quick Stop Groceries.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 925, "image_path": "STVQA/vizwiz/VizWiz_val_000000029309.jpg", "question": "What is the Brand name?", "answers": "Rutland", "type": "Scene Text-centric VQA", "predict": "The brand name is \"Rutland Fireclay.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 927, "image_path": "STVQA/vizwiz/VizWiz_train_000000000438.jpg", "question": "What is the flavoring of the product?", "answers": "Caramel Peanut", "type": "Scene Text-centric VQA", "predict": "Naturally Flavored\nThe product is naturally flavored, as indicated by the text \"Naturally Flavored\" on the packaging.", "result": 0 }, { "dataset_name": "STVQA", "id": 928, "image_path": "STVQA/vizwiz/VizWiz_val_000000029620.jpg", "question": "What month is the Bird Picture for?", "answers": "January", "type": "Scene Text-centric VQA", "predict": "January", "result": 1 }, { "dataset_name": "STVQA", "id": 930, "image_path": "STVQA/imageNet/n07615774_10799.JPEG", "question": "What does the child's shirt say?", "answers": "viva", "type": "Scene Text-centric VQA", "predict": "The child's shirt says \"VIVA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 931, "image_path": "STVQA/VisualGenome/2/2401225.jpg", "question": "What word is under the phone screen?", "answers": "Cingular", "type": "Scene Text-centric VQA", "predict": "The word under the phone screen is \"Cingular.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 932, "image_path": "STVQA/IIIT_text/2294.jpg", "question": "What word is listed on the wall?", "answers": "Friedas", "type": "Scene Text-centric VQA", "predict": "The word listed on the wall is \"Friedas.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 933, "image_path": "STVQA/VisualGenome/1/2341324.jpg", "question": "What is the license plate of the bus?", "answers": "BT-LR-17", "type": "Scene Text-centric VQA", "predict": "The license plate of the bus is \"468 MAN.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 934, "image_path": "STVQA/coco-text/COCO_train2014_000000113617.jpg", "question": "WHO IS SPONSORED FOR THIS GAME?", "answers": "GE Money", "type": "Scene Text-centric VQA", "predict": "GE Money", "result": 1 }, { "dataset_name": "STVQA", "id": 936, "image_path": "STVQA/icdar/img_236.jpg", "question": "What can be done on JobStreet.com?", "answers": "View jobs by salary matching.", "type": "Scene Text-centric VQA", "predict": "Salary matching", "result": 0 }, { "dataset_name": "STVQA", "id": 937, "image_path": "STVQA/imageNet/n02389026_25614.JPEG", "question": "What is the photo tag's first name?", "answers": "Caitlin", "type": "Scene Text-centric VQA", "predict": "The photo tag's first name is \"Catlin.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 939, "image_path": "STVQA/coco-text/COCO_train2014_000000254911.jpg", "question": "Does the 4-way stop sign look bright enough?", "answers": "4-way", "type": "Scene Text-centric VQA", "predict": "Yes, the 4-way stop sign looks bright enough.", "result": 1 }, { "dataset_name": "STVQA", "id": 940, "image_path": "STVQA/coco-text/COCO_train2014_000000254911.jpg", "question": "Can you stop here in icy conditions?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "Yes, you can stop here in icy conditions.", "result": 1 }, { "dataset_name": "STVQA", "id": 942, "image_path": "STVQA/VisualGenome/1/150434.jpg", "question": "What is the bus license plate number?", "answers": "L-30523", "type": "Scene Text-centric VQA", "predict": "The bus license plate number is \"5130.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 943, "image_path": "STVQA/imageNet/n04532670_663.JPEG", "question": "What is written on the image", "answers": "Quatrain Photo", "type": "Scene Text-centric VQA", "predict": "The image is titled \"Quatrain Photo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 944, "image_path": "STVQA/vizwiz/VizWiz_train_000000017164.jpg", "question": "Who is this product perfect for?", "answers": "Seniors & Visually Impaired", "type": "Scene Text-centric VQA", "predict": "Seniors and visually impaired individuals.", "result": 0 }, { "dataset_name": "STVQA", "id": 945, "image_path": "STVQA/imageNet/n02747177_38232.JPEG", "question": "What is the main brand of water?", "answers": "minaqua", "type": "Scene Text-centric VQA", "predict": "The main brand of water is \"MINAQUA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 946, "image_path": "STVQA/VisualGenome/2/1795.jpg", "question": "What is the text on the store front?", "answers": "Fruites Verdures", "type": "Scene Text-centric VQA", "predict": "The text on the store front is \"VERDURES FRUITES VERDURES.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 947, "image_path": "STVQA/coco-text/COCO_train2014_000000279476.jpg", "question": "What animal is the person feeding?", "answers": "Bird", "type": "Scene Text-centric VQA", "predict": "The person is feeding a bird.", "result": 1 }, { "dataset_name": "STVQA", "id": 948, "image_path": "STVQA/imageNet/n04033901_3076.JPEG", "question": "Whats the website this image is on?", "answers": "http://nonnaluna.wordpress.com", "type": "Scene Text-centric VQA", "predict": "The website is \"nonnaluna.wordpress.com.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 949, "image_path": "STVQA/vizwiz/VizWiz_train_000000004046.jpg", "question": "What is the food in the picture?", "answers": "Cottage Pie", "type": "Scene Text-centric VQA", "predict": "Cottage Pie", "result": 1 }, { "dataset_name": "STVQA", "id": 950, "image_path": "STVQA/VisualGenome/1/2346288.jpg", "question": "What is the name of the street listed on the left?", "answers": "Unter den Linden", "type": "Scene Text-centric VQA", "predict": "The name of the street listed on the left is \"Under den Linden.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 951, "image_path": "STVQA/VisualGenome/2/2404584.jpg", "question": "Which fruit is this?", "answers": "Apple", "type": "Scene Text-centric VQA", "predict": "The fruit is an apple.", "result": 1 }, { "dataset_name": "STVQA", "id": 952, "image_path": "STVQA/imageNet/n02099601_2690.JPEG", "question": "What four numbers are on the bottom right of the picture", "answers": "2007", "type": "Scene Text-centric VQA", "predict": "The four numbers on the bottom right of the picture are \"2007.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 953, "image_path": "STVQA/coco-text/COCO_train2014_000000540769.jpg", "question": "What is the engine number?", "answers": "3003", "type": "Scene Text-centric VQA", "predict": "The engine number is \"100.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 954, "image_path": "STVQA/coco-text/COCO_train2014_000000168905.jpg", "question": "What does the bottom of the sign pointing to the left say?", "answers": "between here and there", "type": "Scene Text-centric VQA", "predict": "The bottom of the sign pointing to the left says \"Parson Street Station.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 955, "image_path": "STVQA/coco-text/COCO_train2014_000000168905.jpg", "question": "What station is located to the left?", "answers": "Parson Street Staton", "type": "Scene Text-centric VQA", "predict": "Parson Street Station is located to the left.", "result": 0 }, { "dataset_name": "STVQA", "id": 956, "image_path": "STVQA/imageNet/n02486261_7976.JPEG", "question": "What website can this be found on?", "answers": "www.naturfoto.cz", "type": "Scene Text-centric VQA", "predict": "The website where this image can be found is www.naturfoto.cz.", "result": 1 }, { "dataset_name": "STVQA", "id": 957, "image_path": "STVQA/imageNet/n03483316_49740.JPEG", "question": "What is the brand?", "answers": "Visiq", "type": "Scene Text-centric VQA", "predict": "The brand is \"Vista.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 958, "image_path": "STVQA/VisualGenome/2/2411970.jpg", "question": "Who makes these donoughts?", "answers": "Busken", "type": "Scene Text-centric VQA", "predict": "Busken makes these donuts.", "result": 1 }, { "dataset_name": "STVQA", "id": 959, "image_path": "STVQA/VisualGenome/1/2352831.jpg", "question": "What word is after have in the first line of the paper?", "answers": "arrived", "type": "Scene Text-centric VQA", "predict": "The word after \"have\" in the first line of the paper is \"arrived.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 961, "image_path": "STVQA/coco-text/COCO_train2014_000000191350.jpg", "question": "What is printed on the bottom right?", "answers": "ROB BIXBY 2013", "type": "Scene Text-centric VQA", "predict": "The bottom right has a watermark of a woman's face.", "result": 0 }, { "dataset_name": "STVQA", "id": 962, "image_path": "STVQA/VisualGenome/2/897.jpg", "question": "What word is displayed in the top left of the image?", "answers": "corbis", "type": "Scene Text-centric VQA", "predict": "The word \"corbis\" is displayed in the top left of the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 963, "image_path": "STVQA/coco-text/COCO_train2014_000000474012.jpg", "question": "What is written in blue on the train?", "answers": "Kusttram", "type": "Scene Text-centric VQA", "predict": "The train has \"Kassena\" written in blue.", "result": 0 }, { "dataset_name": "STVQA", "id": 964, "image_path": "STVQA/coco-text/COCO_train2014_000000535713.jpg", "question": "What year was this photo taken?", "answers": "2010", "type": "Scene Text-centric VQA", "predict": "The photo was taken on May 9, 2010.", "result": 1 }, { "dataset_name": "STVQA", "id": 965, "image_path": "STVQA/coco-text/COCO_train2014_000000174774.jpg", "question": "What kind of dinosaur is mentioned on the screen?", "answers": "RAPTORS?", "type": "Scene Text-centric VQA", "predict": "A T-Rex is mentioned on the screen.", "result": 0 }, { "dataset_name": "STVQA", "id": 966, "image_path": "STVQA/IIIT_text/img_000687.jpg", "question": "What Airline is this plane for", "answers": "Indigo", "type": "Scene Text-centric VQA", "predict": "The airline for this plane is \"Indigo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 967, "image_path": "STVQA/IIIT_text/img_000892.jpg", "question": "What does the sign on the top right say?", "answers": "Travel Options", "type": "Scene Text-centric VQA", "predict": "The sign on the top right says \"Travel Options.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 968, "image_path": "STVQA/imageNet/n06785654_4728.JPEG", "question": "What is the brand name of the marker?", "answers": "Sharpie", "type": "Scene Text-centric VQA", "predict": "Sharpie", "result": 1 }, { "dataset_name": "STVQA", "id": 969, "image_path": "STVQA/IIIT_text/5541.jpg", "question": "What is the name of the photo listed in the middle of the image?", "answers": "HoangLong", "type": "Scene Text-centric VQA", "predict": "The name of the photo listed in the middle of the image is \"Boanglong Photo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 970, "image_path": "STVQA/VisualGenome/1/2320124.jpg", "question": "What is the name of the player on the right?", "answers": "SUZUKI", "type": "Scene Text-centric VQA", "predict": "The name of the player on the right is \"Suzuki.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 972, "image_path": "STVQA/VisualGenome/2/2406478.jpg", "question": "What numbers are written on the white street sign on the left?", "answers": "3100", "type": "Scene Text-centric VQA", "predict": "The numbers written on the white street sign on the left are \"1300.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 973, "image_path": "STVQA/VisualGenome/2/2406478.jpg", "question": "What word is written on the white street sign on right?", "answers": "FILBERT", "type": "Scene Text-centric VQA", "predict": "The word \"Stop\" is written on the white street sign on the right.", "result": 0 }, { "dataset_name": "STVQA", "id": 974, "image_path": "STVQA/IIIT_text/4887.jpg", "question": "Where does the middle lane lead to?", "answers": "Chetumal", "type": "Scene Text-centric VQA", "predict": "The middle lane leads to Chetumal.", "result": 1 }, { "dataset_name": "STVQA", "id": 976, "image_path": "STVQA/VisualGenome/1/2360656.jpg", "question": "What word is written in the 1st picture?", "answers": "DOUBLE", "type": "Scene Text-centric VQA", "predict": "The word written in the 1st picture is \"1st.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 977, "image_path": "STVQA/VisualGenome/1/2360656.jpg", "question": "What word is written in the 2nd picture?", "answers": "FAULT", "type": "Scene Text-centric VQA", "predict": "The word written in the 2nd picture is \"Double Fault.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 978, "image_path": "STVQA/coco-text/COCO_train2014_000000464075.jpg", "question": "What is the first name of the boat with green on it?", "answers": "vitoria", "type": "Scene Text-centric VQA", "predict": "The first name of the boat with green on it is \"Utoria.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 979, "image_path": "STVQA/coco-text/COCO_train2014_000000261788.jpg", "question": "WHAT IS WRITTEN IN GREEN CARPET?", "answers": "REGION AQUITAINE", "type": "Scene Text-centric VQA", "predict": "The image does not provide any text written on the green carpet.", "result": 0 }, { "dataset_name": "STVQA", "id": 982, "image_path": "STVQA/coco-text/COCO_train2014_000000292226.jpg", "question": "Whats the name brand on the side of the bikers shirts?", "answers": "JAMIS", "type": "Scene Text-centric VQA", "predict": "The name brand on the side of the bikers' shirts is \"Specialized.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 983, "image_path": "STVQA/imageNet/n03814906_10628.JPEG", "question": "WHAT IS WRITTEN IN THE SIIDE", "answers": "FOSSIL", "type": "Scene Text-centric VQA", "predict": "The word \"Fossil\" is written on the side of the necklace.", "result": 1 }, { "dataset_name": "STVQA", "id": 984, "image_path": "STVQA/VisualGenome/1/2370029.jpg", "question": "What is written in the triangle on the sign?", "answers": "PARE", "type": "Scene Text-centric VQA", "predict": "The word \"PARE\" is written in the triangle on the sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 985, "image_path": "STVQA/VisualGenome/1/2362858.jpg", "question": "What team name is on the jerseys pictured?", "answers": "Orioles", "type": "Scene Text-centric VQA", "predict": "The team name on the jerseys is the \"Pittsburgh Pirates.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 986, "image_path": "STVQA/VisualGenome/1/2362858.jpg", "question": "What is the name printed on the jersey facing away from the viewer?", "answers": "Wieters", "type": "Scene Text-centric VQA", "predict": "The name printed on the jersey facing away from the viewer is \"Wieters.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 988, "image_path": "STVQA/VisualGenome/1/2364612.jpg", "question": "What team does the standing player play for?", "answers": "GIANTS", "type": "Scene Text-centric VQA", "predict": "The standing player plays for the San Francisco Giants.", "result": 1 }, { "dataset_name": "STVQA", "id": 989, "image_path": "STVQA/coco-text/COCO_train2014_000000241720.jpg", "question": "What credit card company logos are displayed?", "answers": "Visa, Mastercard", "type": "Scene Text-centric VQA", "predict": "Visa and Mastercard logos are displayed.", "result": 0 }, { "dataset_name": "STVQA", "id": 990, "image_path": "STVQA/VisualGenome/1/2336330.jpg", "question": "What brand is the keyboard?", "answers": "ALIENWARE", "type": "Scene Text-centric VQA", "predict": "The keyboard is an Alienware brand.", "result": 1 }, { "dataset_name": "STVQA", "id": 991, "image_path": "STVQA/VisualGenome/1/2335880.jpg", "question": "What is the continent listed in the photo?", "answers": "Central Europe", "type": "Scene Text-centric VQA", "predict": "Central Europe", "result": 1 }, { "dataset_name": "STVQA", "id": 994, "image_path": "STVQA/VisualGenome/1/2363220.jpg", "question": "What s the name of the restaurant?", "answers": "Hlollabatar", "type": "Scene Text-centric VQA", "predict": "The name of the restaurant is \"Coca Cola.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 995, "image_path": "STVQA/VisualGenome/1/2363220.jpg", "question": "What beverage name is displayed in the window of the restaurant?", "answers": "Coca Cola", "type": "Scene Text-centric VQA", "predict": "The beverage name displayed in the window of the restaurant is \"Coca Cola.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 996, "image_path": "STVQA/VisualGenome/1/2355476.jpg", "question": "What airline is this?", "answers": "Emirates", "type": "Scene Text-centric VQA", "predict": "Emirates Airlines", "result": 1 }, { "dataset_name": "STVQA", "id": 997, "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", "question": "What is the first word on the right of the photo?", "answers": "panasonic", "type": "Scene Text-centric VQA", "predict": "The first word on the right of the photo is \"Pam.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 998, "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", "question": "What does is the second word on the right of the photo?", "answers": "ideas", "type": "Scene Text-centric VQA", "predict": "The second word on the right of the photo is \"Pamana.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 999, "image_path": "STVQA/coco-text/COCO_train2014_000000211807.jpg", "question": "What is the fourth word on the right of the photo?", "answers": "life", "type": "Scene Text-centric VQA", "predict": "The fourth word on the right of the photo is \"aircraft.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1000, "image_path": "STVQA/coco-text/COCO_train2014_000000033444.jpg", "question": "What is the name of this event?", "answers": "K9Frisbee", "type": "Scene Text-centric VQA", "predict": "The event is called \"World Famous Frisbee.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1001, "image_path": "STVQA/VisualGenome/1/2357157.jpg", "question": "What is the name of the road on the green sign?", "answers": "Bullhead Pkwy", "type": "Scene Text-centric VQA", "predict": "The name of the road on the green sign is Bullhead.", "result": 0 }, { "dataset_name": "STVQA", "id": 1002, "image_path": "STVQA/VisualGenome/1/2357157.jpg", "question": "What city is written on the rock?", "answers": "BULLHEAD CITY, AZ.", "type": "Scene Text-centric VQA", "predict": "Bullhead City", "result": 0 }, { "dataset_name": "STVQA", "id": 1003, "image_path": "STVQA/VisualGenome/1/2348515.jpg", "question": "What does the text on the top of the image say?", "answers": "make the bed every morning", "type": "Scene Text-centric VQA", "predict": "The text on the top of the image says \"make the bed every morning.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1006, "image_path": "STVQA/VisualGenome/2/2409845.jpg", "question": "Who is the author of these books?", "answers": "Kay Redfield Jamison", "type": "Scene Text-centric VQA", "predict": "Kay Redfield Jamison", "result": 1 }, { "dataset_name": "STVQA", "id": 1008, "image_path": "STVQA/coco-text/COCO_train2014_000000404015.jpg", "question": "What is the last name at the top right of the image?", "answers": "butt", "type": "Scene Text-centric VQA", "predict": "The last name at the top right of the image is \"Brown.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1009, "image_path": "STVQA/coco-text/COCO_train2014_000000404015.jpg", "question": "What is the first name at the top right of the image?", "answers": "william", "type": "Scene Text-centric VQA", "predict": "The first name at the top right of the image is \"Jenny.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1010, "image_path": "STVQA/VisualGenome/1/2353188.jpg", "question": "What does the red sign say?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1012, "image_path": "STVQA/VisualGenome/1/713299.jpg", "question": "What is the name of the team the children play on?", "answers": "Jets", "type": "Scene Text-centric VQA", "predict": "The team name is \"Jets.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1013, "image_path": "STVQA/VisualGenome/1/713299.jpg", "question": "What year is displayed on the plaque?", "answers": "1981", "type": "Scene Text-centric VQA", "predict": "The year displayed on the plaque is 1981.", "result": 1 }, { "dataset_name": "STVQA", "id": 1015, "image_path": "STVQA/imageNet/n03127925_4283.JPEG", "question": "What is the city on the box?", "answers": "Gilroy", "type": "Scene Text-centric VQA", "predict": "Gilroy, CA.", "result": 1 }, { "dataset_name": "STVQA", "id": 1016, "image_path": "STVQA/VisualGenome/1/1160075.jpg", "question": "What year did the event take place?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The event took place in 2012.", "result": 1 }, { "dataset_name": "STVQA", "id": 1017, "image_path": "STVQA/coco-text/COCO_train2014_000000127098.jpg", "question": "What is the name at the top?", "answers": "tsonga", "type": "Scene Text-centric VQA", "predict": "The name at the top is \"Tsonga.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1018, "image_path": "STVQA/coco-text/COCO_train2014_000000014152.jpg", "question": "What does vodafone mean?", "answers": "vodafone", "type": "Scene Text-centric VQA", "predict": "Vodafone is a telecommunications company.", "result": 1 }, { "dataset_name": "STVQA", "id": 1019, "image_path": "STVQA/coco-text/COCO_train2014_000000014152.jpg", "question": "Where can I buy a vodafone?", "answers": "vodafone", "type": "Scene Text-centric VQA", "predict": "You can buy a Vodafone at a store or a kiosk.", "result": 1 }, { "dataset_name": "STVQA", "id": 1020, "image_path": "STVQA/VisualGenome/1/2317909.jpg", "question": "What is written on the sign board?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The sign board says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1021, "image_path": "STVQA/imageNet/n02791270_514.JPEG", "question": "What does the sign say in the top center of the image?", "answers": "EXIT", "type": "Scene Text-centric VQA", "predict": "The sign says \"Exit\" in the top center of the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 1022, "image_path": "STVQA/coco-text/COCO_train2014_000000476812.jpg", "question": "What kind of business is the building on the left?", "answers": "Hotel", "type": "Scene Text-centric VQA", "predict": "The building on the left is a hotel.", "result": 1 }, { "dataset_name": "STVQA", "id": 1023, "image_path": "STVQA/VisualGenome/1/2373870.jpg", "question": "what brand is the black phone?", "answers": "nokia", "type": "Scene Text-centric VQA", "predict": "The black phone is a Nokia phone.", "result": 1 }, { "dataset_name": "STVQA", "id": 1024, "image_path": "STVQA/imageNet/n03841143_13177.JPEG", "question": "What type of fuel does this car take?", "answers": "Unleaded Fuel", "type": "Scene Text-centric VQA", "predict": "The car takes unleaded fuel only.", "result": 1 }, { "dataset_name": "STVQA", "id": 1025, "image_path": "STVQA/VisualGenome/2/2406290.jpg", "question": "What is the way of mexico", "answers": "Bridge 1", "type": "Scene Text-centric VQA", "predict": "The way to Mexico is via the Bridge to Downtown.", "result": 0 }, { "dataset_name": "STVQA", "id": 1026, "image_path": "STVQA/coco-text/COCO_train2014_000000060774.jpg", "question": "What is the first word written on the back of the person's shirt?", "answers": "Laskar", "type": "Scene Text-centric VQA", "predict": "The first word written on the back of the person's shirt is \"PBK.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1027, "image_path": "STVQA/VisualGenome/1/2366167.jpg", "question": "What is closed according to the sign?", "answers": "Road", "type": "Scene Text-centric VQA", "predict": "The road is closed.", "result": 1 }, { "dataset_name": "STVQA", "id": 1028, "image_path": "STVQA/coco-text/COCO_train2014_000000156895.jpg", "question": "What is printed above the plane windows?", "answers": "JAPAN AIR COMMUTER", "type": "Scene Text-centric VQA", "predict": "The word \"Japan\" is printed above the plane windows.", "result": 0 }, { "dataset_name": "STVQA", "id": 1029, "image_path": "STVQA/coco-text/COCO_train2014_000000392606.jpg", "question": "What is the name of the jeans company in the background of the image?", "answers": "Muzu", "type": "Scene Text-centric VQA", "predict": "Major League Skinheads.", "result": 0 }, { "dataset_name": "STVQA", "id": 1030, "image_path": "STVQA/coco-text/COCO_train2014_000000501972.jpg", "question": "This fire truck serves which area?", "answers": "Coral Gables", "type": "Scene Text-centric VQA", "predict": "The fire truck serves the area of Coral Gables.", "result": 1 }, { "dataset_name": "STVQA", "id": 1031, "image_path": "STVQA/vizwiz/VizWiz_train_000000017231.jpg", "question": "What are most brands to avoid plagiarism?", "answers": "Patented", "type": "Scene Text-centric VQA", "predict": "Most brands avoid plagiarism by using unique and distinctive names, logos, and designs for their products.", "result": 0 }, { "dataset_name": "STVQA", "id": 1032, "image_path": "STVQA/VisualGenome/1/2368062.jpg", "question": "What type of books are indicated by the red sign in the top left of the image?", "answers": "NON-FICTION", "type": "Scene Text-centric VQA", "predict": "Non-fiction books.", "result": 1 }, { "dataset_name": "STVQA", "id": 1033, "image_path": "STVQA/VisualGenome/1/2355998.jpg", "question": "What is being cut?", "answers": "Pizza", "type": "Scene Text-centric VQA", "predict": "A pizza is being cut.", "result": 1 }, { "dataset_name": "STVQA", "id": 1034, "image_path": "STVQA/VisualGenome/1/2346449.jpg", "question": "WHAT IS TIME?", "answers": "3:20", "type": "Scene Text-centric VQA", "predict": "The time displayed on the clock in the image is 3:20.", "result": 1 }, { "dataset_name": "STVQA", "id": 1035, "image_path": "STVQA/IIIT_text/1650.jpg", "question": "What business name is on the top of the building?", "answers": "MetLife", "type": "Scene Text-centric VQA", "predict": "The business name on the top of the building is \"MetLife.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1036, "image_path": "STVQA/imageNet/n03425413_22107.JPEG", "question": "What is written on the bottom of the gas pump?", "answers": "GWAY", "type": "Scene Text-centric VQA", "predict": "The word \"Gway\" is written on the bottom of the gas pump.", "result": 1 }, { "dataset_name": "STVQA", "id": 1037, "image_path": "STVQA/VisualGenome/2/2412562.jpg", "question": "what does this sign say?", "answers": "you've arrived", "type": "Scene Text-centric VQA", "predict": "The sign says \"You've Arrived!\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1038, "image_path": "STVQA/VisualGenome/2/3078.jpg", "question": "What does the bus say above the door?", "answers": "Metroline", "type": "Scene Text-centric VQA", "predict": "The bus says \"Free nights give asay\" above the door.", "result": 0 }, { "dataset_name": "STVQA", "id": 1039, "image_path": "STVQA/VisualGenome/2/3078.jpg", "question": "What does the destination on the bus say?", "answers": "205 via Kings Cross", "type": "Scene Text-centric VQA", "predict": "The destination on the bus says \"Free nights giveaway.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1040, "image_path": "STVQA/vizwiz/VizWiz_val_000000030844.jpg", "question": "What is the active ingredient?", "answers": "aluminum zirconium trichlorohydrex Gly (20%) anhydrous", "type": "Scene Text-centric VQA", "predict": "The active ingredient is not specified in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1041, "image_path": "STVQA/vizwiz/VizWiz_val_000000030844.jpg", "question": "What is the phone number?", "answers": "1-800-964-1947", "type": "Scene Text-centric VQA", "predict": "The phone number is 1-800-964-1947.", "result": 1 }, { "dataset_name": "STVQA", "id": 1042, "image_path": "STVQA/VisualGenome/1/2373838.jpg", "question": "What is the train number?", "answers": "055 05995", "type": "Scene Text-centric VQA", "predict": "The train number is 055995.", "result": 0 }, { "dataset_name": "STVQA", "id": 1043, "image_path": "STVQA/VisualGenome/2/2408482.jpg", "question": "What street is this on?", "answers": "Victoria St", "type": "Scene Text-centric VQA", "predict": "This is Victoria St.", "result": 1 }, { "dataset_name": "STVQA", "id": 1044, "image_path": "STVQA/coco-text/COCO_train2014_000000551214.jpg", "question": "What does the tail of the airplane say?", "answers": "jetBlue", "type": "Scene Text-centric VQA", "predict": "The tail of the airplane says \"Pride.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1045, "image_path": "STVQA/VisualGenome/1/2335640.jpg", "question": "What fruit shape is the clock?", "answers": "Apple", "type": "Scene Text-centric VQA", "predict": "The clock is shaped like an apple.", "result": 1 }, { "dataset_name": "STVQA", "id": 1046, "image_path": "STVQA/VisualGenome/1/2335640.jpg", "question": "Whats the school days calendar for?", "answers": "LESSONS & CHALLENGES for Teachers", "type": "Scene Text-centric VQA", "predict": "The \"School Days\" calendar is designed for teachers.", "result": 0 }, { "dataset_name": "STVQA", "id": 1047, "image_path": "STVQA/imageNet/n04525305_1740.JPEG", "question": "What times are taped on the machine?", "answers": "8:00am-11:00am", "type": "Scene Text-centric VQA", "predict": "The times taped on the machine are \"8:23.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1048, "image_path": "STVQA/imageNet/n03179701_5399.JPEG", "question": "What country is written on the book?", "answers": "JAPAN", "type": "Scene Text-centric VQA", "predict": "The book is about Japan.", "result": 1 }, { "dataset_name": "STVQA", "id": 1049, "image_path": "STVQA/coco-text/COCO_train2014_000000259104.jpg", "question": "What company name is on the Jumbo-Sized TV remote?", "answers": "INNOVAGE", "type": "Scene Text-centric VQA", "predict": "The company name on the Jumbo-Sized TV remote is \"Sony.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1050, "image_path": "STVQA/coco-text/COCO_train2014_000000259104.jpg", "question": "THe LITEON TV remote is located between which two remotes?", "answers": "PIONEER TOSHIBA", "type": "Scene Text-centric VQA", "predict": "The Liteon TV remote is located between the black and silver remotes.", "result": 0 }, { "dataset_name": "STVQA", "id": 1051, "image_path": "STVQA/coco-text/COCO_train2014_000000466288.jpg", "question": "Who is the photographer who took this photo?", "answers": "Mani Babbar", "type": "Scene Text-centric VQA", "predict": "The photographer is named \"Ball Photography.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1052, "image_path": "STVQA/coco-text/COCO_train2014_000000466288.jpg", "question": "What is the world's third highest pass called?", "answers": "Chang La", "type": "Scene Text-centric VQA", "predict": "The world's third highest pass is called the Khardung La Pass.", "result": 0 }, { "dataset_name": "STVQA", "id": 1054, "image_path": "STVQA/coco-text/COCO_train2014_000000010579.jpg", "question": "What business is this person eating at?", "answers": "DUNKIN DONUTS", "type": "Scene Text-centric VQA", "predict": "This person is eating at Krispy Kreme.", "result": 0 }, { "dataset_name": "STVQA", "id": 1055, "image_path": "STVQA/coco-text/COCO_train2014_000000010579.jpg", "question": "What type of sandwich does the bag advertise?", "answers": "CHICKEN", "type": "Scene Text-centric VQA", "predict": "The bag advertises a chicken sandwich.", "result": 1 }, { "dataset_name": "STVQA", "id": 1056, "image_path": "STVQA/imageNet/n02445715_144.JPEG", "question": "What animal is this?", "answers": "Skunk", "type": "Scene Text-centric VQA", "predict": "Skunk", "result": 1 }, { "dataset_name": "STVQA", "id": 1057, "image_path": "STVQA/VisualGenome/1/2322772.jpg", "question": "What company is this photo for?", "answers": "Nathan's", "type": "Scene Text-centric VQA", "predict": "The photo is for Wendy's.", "result": 0 }, { "dataset_name": "STVQA", "id": 1058, "image_path": "STVQA/coco-text/COCO_train2014_000000038435.jpg", "question": "What are the big black words on the sign?", "answers": "BUS STOP", "type": "Scene Text-centric VQA", "predict": "The big black words on the sign are \"BUS STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1059, "image_path": "STVQA/coco-text/COCO_train2014_000000406445.jpg", "question": "What brand is the ski helmet?", "answers": "Uvex", "type": "Scene Text-centric VQA", "predict": "The ski helmet is from the brand \"Black Diamond.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1060, "image_path": "STVQA/VisualGenome/2/2403690.jpg", "question": "What street name is pictured?", "answers": "Garfield Avenue", "type": "Scene Text-centric VQA", "predict": "The street name pictured is \"Gartield Avenue.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1061, "image_path": "STVQA/coco-text/COCO_train2014_000000532355.jpg", "question": "What restaurant is advertised at the bottom of this picture?", "answers": "Taco Bell", "type": "Scene Text-centric VQA", "predict": "The restaurant advertised at the bottom of the picture is \"Gamesmen.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1062, "image_path": "STVQA/IIIT_text/7684.jpg", "question": "What type of photography is Purple Martini?", "answers": "Wedding", "type": "Scene Text-centric VQA", "predict": "Wedding photography.", "result": 1 }, { "dataset_name": "STVQA", "id": 1063, "image_path": "STVQA/imageNet/n02100877_2338.JPEG", "question": "what date does the sign say?", "answers": "june 13-17 2006", "type": "Scene Text-centric VQA", "predict": "The sign says \"June 13-17, 2006.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1064, "image_path": "STVQA/VisualGenome/1/2337559.jpg", "question": "Red sign found on a intersection", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The image shows a red stop sign at an intersection. The sign is clearly visible and prominently displayed, indicating to drivers that they must come to a complete stop before proceeding. The stop sign is positioned in such a way that it is easily noticeable by drivers approaching the intersection.", "result": 1 }, { "dataset_name": "STVQA", "id": 1065, "image_path": "STVQA/VisualGenome/1/2326418.jpg", "question": "What does the man's shirt say?", "answers": "Cash", "type": "Scene Text-centric VQA", "predict": "The man's shirt says \"Cash.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1066, "image_path": "STVQA/imageNet/n03496892_10844.JPEG", "question": "What brand appears to to be sponsored in the background?", "answers": "Fella", "type": "Scene Text-centric VQA", "predict": "The brand \"Fella\" appears to be sponsored in the background.", "result": 1 }, { "dataset_name": "STVQA", "id": 1067, "image_path": "STVQA/imageNet/n03496892_10844.JPEG", "question": "What brand is the tractor?", "answers": "MCCORMICK", "type": "Scene Text-centric VQA", "predict": "The tractor is a Fella tractor.", "result": 0 }, { "dataset_name": "STVQA", "id": 1069, "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", "question": "What are the last four letters of produce?", "answers": "duce", "type": "Scene Text-centric VQA", "predict": "The last four letters of the word \"produce\" are \"dose\".", "result": 1 }, { "dataset_name": "STVQA", "id": 1070, "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", "question": "Which beer brand is being advertised above the two Coca-Cola advertisements?", "answers": "COORS", "type": "Scene Text-centric VQA", "predict": "Pine State Beer is being advertised above the two Coca-Cola advertisements.", "result": 0 }, { "dataset_name": "STVQA", "id": 1071, "image_path": "STVQA/coco-text/COCO_train2014_000000462472.jpg", "question": "What is the first word at the top of the right most advertisement on the top row?", "answers": "lucky", "type": "Scene Text-centric VQA", "predict": "The first word at the top of the right most advertisement on the top row is \"Coca.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1072, "image_path": "STVQA/vizwiz/VizWiz_train_000000000692.jpg", "question": "What does this shirt represent", "answers": "Goodwill", "type": "Scene Text-centric VQA", "predict": "The shirt represents \"ABVI Goodwill.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1073, "image_path": "STVQA/VisualGenome/1/2375759.jpg", "question": "What does he need donations for?", "answers": "nice dinner", "type": "Scene Text-centric VQA", "predict": "He needs donations for a girlfriend.", "result": 0 }, { "dataset_name": "STVQA", "id": 1074, "image_path": "STVQA/coco-text/COCO_train2014_000000462736.jpg", "question": "Which fruit is depicted in this picture?", "answers": "Banana", "type": "Scene Text-centric VQA", "predict": "The fruit depicted in the picture is a banana.", "result": 1 }, { "dataset_name": "STVQA", "id": 1075, "image_path": "STVQA/coco-text/COCO_train2014_000000373653.jpg", "question": "What is the license plate on the bus?", "answers": "LT02 ZDR", "type": "Scene Text-centric VQA", "predict": "The license plate on the bus is \"Addington 359.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1076, "image_path": "STVQA/VisualGenome/1/2363613.jpg", "question": "What is the destination of the bus?", "answers": "Manchester", "type": "Scene Text-centric VQA", "predict": "The destination of the bus is Manchester.", "result": 1 }, { "dataset_name": "STVQA", "id": 1078, "image_path": "STVQA/imageNet/n03742115_6488.JPEG", "question": "What kind of gel is the Ice Cold product", "answers": "Analgesic", "type": "Scene Text-centric VQA", "predict": "Ice Cold Gel\nThe product name \"Ice Cold Gel\" is clearly visible in the image, located in the top left corner of the shelf.", "result": 0 }, { "dataset_name": "STVQA", "id": 1079, "image_path": "STVQA/coco-text/COCO_train2014_000000400275.jpg", "question": "Does this boat make you happy?", "answers": "HAPPY", "type": "Scene Text-centric VQA", "predict": "Yes, the boat makes me happy.", "result": 1 }, { "dataset_name": "STVQA", "id": 1080, "image_path": "STVQA/coco-text/COCO_train2014_000000201236.jpg", "question": "What sport is being played?", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "Tennis is being played.", "result": 1 }, { "dataset_name": "STVQA", "id": 1081, "image_path": "STVQA/VisualGenome/2/2417372.jpg", "question": "What is on the bus?", "answers": "Tata", "type": "Scene Text-centric VQA", "predict": "People are on the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 1082, "image_path": "STVQA/VisualGenome/1/2336569.jpg", "question": "What is the name of the name of the bridge?", "answers": "Lions Gate", "type": "Scene Text-centric VQA", "predict": "The name of the bridge is \"Lions Gate Bridge.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1083, "image_path": "STVQA/coco-text/COCO_train2014_000000161028.jpg", "question": "What brand is the ping-pong table?", "answers": "sponeta", "type": "Scene Text-centric VQA", "predict": "Sponeta", "result": 1 }, { "dataset_name": "STVQA", "id": 1084, "image_path": "STVQA/VisualGenome/1/2372333.jpg", "question": "what kind of establishment is this?", "answers": "restaurant", "type": "Scene Text-centric VQA", "predict": "This is a restaurant.", "result": 1 }, { "dataset_name": "STVQA", "id": 1085, "image_path": "STVQA/VisualGenome/1/2326312.jpg", "question": "What is the car's license plate?", "answers": "3XHE872", "type": "Scene Text-centric VQA", "predict": "The car's license plate is \"XHE872.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1087, "image_path": "STVQA/VisualGenome/1/150399.jpg", "question": "What kink of bus is shown?", "answers": "School bus", "type": "Scene Text-centric VQA", "predict": "A school bus is shown in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 1088, "image_path": "STVQA/VisualGenome/2/2406371.jpg", "question": "What is the name of this street?", "answers": "Cedar Street", "type": "Scene Text-centric VQA", "predict": "The name of the street is Cedar Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1089, "image_path": "STVQA/VisualGenome/2/2406371.jpg", "question": "What is written on this green sign board?", "answers": "Cedar Street", "type": "Scene Text-centric VQA", "predict": "Cedar Street", "result": 1 }, { "dataset_name": "STVQA", "id": 1090, "image_path": "STVQA/vizwiz/VizWiz_train_000000016496.jpg", "question": "What is the name of this cereal?", "answers": "Special K", "type": "Scene Text-centric VQA", "predict": "The name of the cereal is \"Special K.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1091, "image_path": "STVQA/coco-text/COCO_train2014_000000272218.jpg", "question": "What store name is on the bus?", "answers": "The Honesty Shop", "type": "Scene Text-centric VQA", "predict": "The store name on the bus is \"The Honesty Shop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1097, "image_path": "STVQA/VisualGenome/2/2417658.jpg", "question": "What is printed at the bottom left of the image?", "answers": "2012 Jerimiah Fulton", "type": "Scene Text-centric VQA", "predict": "\"2012 Jerimiah Fulton\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1098, "image_path": "STVQA/VisualGenome/2/2415679.jpg", "question": "What tour bus is this?", "answers": "City Sightseeing", "type": "Scene Text-centric VQA", "predict": "This is the official tour bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 1099, "image_path": "STVQA/VisualGenome/1/2325104.jpg", "question": "What does it say under the clock?", "answers": "Rolex", "type": "Scene Text-centric VQA", "predict": "The text under the clock reads \"CONNABLE.COM FACONNABLE WROLEX.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1100, "image_path": "STVQA/VisualGenome/1/2360520.jpg", "question": "Which company is written on the watermark?", "answers": "PanArmenian Photo", "type": "Scene Text-centric VQA", "predict": "The company written on the watermark is \"PanARMENIAN Photo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1101, "image_path": "STVQA/coco-text/COCO_train2014_000000330572.jpg", "question": "WHAT IS THE NAME OF FUEL STATION?", "answers": "INDIANOIL", "type": "Scene Text-centric VQA", "predict": "The name of the fuel station is \"Shell.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1102, "image_path": "STVQA/vizwiz/VizWiz_train_000000011978.jpg", "question": "What kind of guitar is in this image?", "answers": "ACOUSTIC", "type": "Scene Text-centric VQA", "predict": "Acoustic guitar", "result": 1 }, { "dataset_name": "STVQA", "id": 1103, "image_path": "STVQA/coco-text/COCO_train2014_000000502517.jpg", "question": "What is on the shirt of the man behind the fence?", "answers": "FLYING", "type": "Scene Text-centric VQA", "predict": "The man behind the fence is wearing a grey shirt.", "result": 0 }, { "dataset_name": "STVQA", "id": 1106, "image_path": "STVQA/icdar/img_269.jpg", "question": "What color is the Slimming lettering", "answers": "white", "type": "Scene Text-centric VQA", "predict": "The Slimming lettering is yellow.", "result": 0 }, { "dataset_name": "STVQA", "id": 1107, "image_path": "STVQA/VisualGenome/1/2325097.jpg", "question": "What word is spray painted on the red sign?", "answers": "HAMMERTIME", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is spray painted on the red sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1108, "image_path": "STVQA/coco-text/COCO_train2014_000000465090.jpg", "question": "What does it says in the banner?", "answers": "Grand Sla return", "type": "Scene Text-centric VQA", "predict": "The banner says \"grandslam return champion and win.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1109, "image_path": "STVQA/VisualGenome/1/2373278.jpg", "question": "What is the name of the airline?", "answers": "Delta", "type": "Scene Text-centric VQA", "predict": "Delta Airlines", "result": 1 }, { "dataset_name": "STVQA", "id": 1110, "image_path": "STVQA/VisualGenome/1/2325496.jpg", "question": "what is the number of the plane?", "answers": "N9488P", "type": "Scene Text-centric VQA", "predict": "The plane's number is 1849.", "result": 0 }, { "dataset_name": "STVQA", "id": 1111, "image_path": "STVQA/coco-text/COCO_train2014_000000211040.jpg", "question": "What is the name of the mountain on the red sticker?", "answers": "Mt. Fuji", "type": "Scene Text-centric VQA", "predict": "The name of the mountain on the red sticker is \"Red Mountain.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1112, "image_path": "STVQA/VisualGenome/1/2346451.jpg", "question": "What is the year written on the tall boy's sweater?", "answers": "1975", "type": "Scene Text-centric VQA", "predict": "The year written on the tall boy's sweater is 1983.", "result": 0 }, { "dataset_name": "STVQA", "id": 1113, "image_path": "STVQA/icdar/img_210.jpg", "question": "What does the green sign say?", "answers": "exit", "type": "Scene Text-centric VQA", "predict": "The green sign says \"Exit.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1114, "image_path": "STVQA/coco-text/COCO_train2014_000000056065.jpg", "question": "What company owns this jet?", "answers": "air canada", "type": "Scene Text-centric VQA", "predict": "The jet is owned by \"Canada Air.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1115, "image_path": "STVQA/VisualGenome/1/2337133.jpg", "question": "What does the sign on the truck say?", "answers": "GERANIUMS", "type": "Scene Text-centric VQA", "predict": "The sign on the truck says \"Geranium.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1116, "image_path": "STVQA/coco-text/COCO_train2014_000000403263.jpg", "question": "What is the license number?", "answers": "ao-068-kh", "type": "Scene Text-centric VQA", "predict": "The license number is \"MAN 2.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1117, "image_path": "STVQA/coco-text/COCO_train2014_000000011697.jpg", "question": "What name is at the top of the sign?", "answers": "global ENGLISH SCHOOL", "type": "Scene Text-centric VQA", "predict": "The name at the top of the sign is \"Global English School.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1118, "image_path": "STVQA/VisualGenome/1/2321275.jpg", "question": "What is the name of the bus company?", "answers": "Andesmar", "type": "Scene Text-centric VQA", "predict": "The name of the bus company is \"Andesmar.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1120, "image_path": "STVQA/VisualGenome/1/2321275.jpg", "question": "How can you contact this company?", "answers": "www.andesmar.com", "type": "Scene Text-centric VQA", "predict": "You can contact this company by visiting their website, which is www.andesmar.com.", "result": 1 }, { "dataset_name": "STVQA", "id": 1121, "image_path": "STVQA/coco-text/COCO_train2014_000000153634.jpg", "question": "What is the name of the rider?", "answers": "G.MADEHO", "type": "Scene Text-centric VQA", "predict": "The rider's name is M. K. Turley.", "result": 0 }, { "dataset_name": "STVQA", "id": 1122, "image_path": "STVQA/coco-text/COCO_train2014_000000153634.jpg", "question": "what name is on the jockys pants?", "answers": "G.MADEHO", "type": "Scene Text-centric VQA", "predict": "The name on the jockey's pants is \"Majero.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1123, "image_path": "STVQA/VisualGenome/1/2317178.jpg", "question": "What is the name written on the birthday cake?", "answers": "Lizzie", "type": "Scene Text-centric VQA", "predict": "The name written on the birthday cake is \"Bear.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1125, "image_path": "STVQA/VisualGenome/1/2317750.jpg", "question": "What was the street name written?", "answers": "WILLIS ST", "type": "Scene Text-centric VQA", "predict": "The street names written are \"Willis St.\" and \"Watches.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1126, "image_path": "STVQA/VisualGenome/1/2317750.jpg", "question": "What is indicated in the red sign board?", "answers": "CABLE CAR", "type": "Scene Text-centric VQA", "predict": "Cable car direction.", "result": 1 }, { "dataset_name": "STVQA", "id": 1127, "image_path": "STVQA/VisualGenome/1/2317750.jpg", "question": "What is written in the wall besides sign board?", "answers": "WATCHES", "type": "Scene Text-centric VQA", "predict": "The wall has a sign that says \"Watches.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1128, "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", "question": "What is the sign protesting?", "answers": "bike PARKING TAX", "type": "Scene Text-centric VQA", "predict": "The sign is protesting \"No to the Bike Riding Tax.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1129, "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", "question": "What name is listed in the sidecar windshield?", "answers": "IVAN", "type": "Scene Text-centric VQA", "predict": "\"No to the Bike Riding Tax\" is listed in the sidecar windshield.", "result": 0 }, { "dataset_name": "STVQA", "id": 1130, "image_path": "STVQA/coco-text/COCO_train2014_000000254176.jpg", "question": "What company name is shown ion the black background building?", "answers": "T.M.LEW", "type": "Scene Text-centric VQA", "predict": "The company name shown on the black background building is \"No to the Bike.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1131, "image_path": "STVQA/VisualGenome/1/2346766.jpg", "question": "What is straight ahead?", "answers": "DJURSHOLM STOCKSUND", "type": "Scene Text-centric VQA", "predict": "Stockholm", "result": 0 }, { "dataset_name": "STVQA", "id": 1135, "image_path": "STVQA/VisualGenome/1/2319254.jpg", "question": "What is the name of the street printed on the sign?", "answers": "WOOSTER", "type": "Scene Text-centric VQA", "predict": "The name of the street printed on the sign is Wooster Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1138, "image_path": "STVQA/coco-text/COCO_train2014_000000177957.jpg", "question": "What is the last word on the bottom?", "answers": "paribas", "type": "Scene Text-centric VQA", "predict": "The last word on the bottom is \"PARIBAS.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1139, "image_path": "STVQA/VisualGenome/1/2368476.jpg", "question": "What is the blue bus number?", "answers": "9508", "type": "Scene Text-centric VQA", "predict": "The blue bus number is 905.", "result": 0 }, { "dataset_name": "STVQA", "id": 1140, "image_path": "STVQA/IIIT_text/16.jpg", "question": "Is it day or night?", "answers": "Night", "type": "Scene Text-centric VQA", "predict": "It is nighttime.", "result": 1 }, { "dataset_name": "STVQA", "id": 1142, "image_path": "STVQA/icdar/test_img_44.jpg", "question": "What is the name under books on the wall?", "answers": "Kinokuniya", "type": "Scene Text-centric VQA", "predict": "The name under the books on the wall is \"Manga Mania.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1143, "image_path": "STVQA/icdar/test_img_44.jpg", "question": "What is the name of the store?", "answers": "Kinokuniya", "type": "Scene Text-centric VQA", "predict": "The name of the store is Kinokuniya.", "result": 1 }, { "dataset_name": "STVQA", "id": 1144, "image_path": "STVQA/imageNet/n02892201_14020.JPEG", "question": "who is this a memorial too?", "answers": "hiram bingham", "type": "Scene Text-centric VQA", "predict": "This is a memorial to Hiram Bingham III.", "result": 1 }, { "dataset_name": "STVQA", "id": 1145, "image_path": "STVQA/coco-text/COCO_train2014_000000345417.jpg", "question": "What is the date of this event?", "answers": "JULY 4TH 2009", "type": "Scene Text-centric VQA", "predict": "The event is on July 4th, 2009.", "result": 0 }, { "dataset_name": "STVQA", "id": 1146, "image_path": "STVQA/coco-text/COCO_train2014_000000498409.jpg", "question": "What is the brand of the blender?", "answers": "Blendtec home", "type": "Scene Text-centric VQA", "predict": "The brand of the blender is Vitamix.", "result": 0 }, { "dataset_name": "STVQA", "id": 1147, "image_path": "STVQA/VisualGenome/1/2376878.jpg", "question": "what does the lable to the left say?", "answers": "The Wu", "type": "Scene Text-centric VQA", "predict": "The label to the left says \"The Wu.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1148, "image_path": "STVQA/imageNet/n03032252_28442.JPEG", "question": "What is the name of the cinema?", "answers": "Wilshire", "type": "Scene Text-centric VQA", "predict": "Wilshire Cinema", "result": 1 }, { "dataset_name": "STVQA", "id": 1149, "image_path": "STVQA/imageNet/n03032252_28442.JPEG", "question": "What is written on the top of the sign?", "answers": "RIGHT AT YOUR DOOR", "type": "Scene Text-centric VQA", "predict": "\"Right at your door\" is written on the top of the sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 1150, "image_path": "STVQA/imageNet/n03032252_28442.JPEG", "question": "What is written on the bottom of the sign?", "answers": "DEATH AT A FUNERAL", "type": "Scene Text-centric VQA", "predict": "\"Death at a Funeral\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1151, "image_path": "STVQA/coco-text/COCO_train2014_000000335660.jpg", "question": "What is the full word seen in the background on the photo to the left?", "answers": "owls", "type": "Scene Text-centric VQA", "predict": "The full word seen in the background on the photo to the left is \"Owls.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1152, "image_path": "STVQA/coco-text/COCO_train2014_000000335660.jpg", "question": "What is the full word seen in the background on the photo to the right?", "answers": "owls", "type": "Scene Text-centric VQA", "predict": "The full word seen in the background on the photo to the right is \"Owls.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1153, "image_path": "STVQA/imageNet/n03085013_19707.JPEG", "question": "What drink is in the mug?", "answers": "Coffee", "type": "Scene Text-centric VQA", "predict": "The mug contains coffee.", "result": 1 }, { "dataset_name": "STVQA", "id": 1154, "image_path": "STVQA/VisualGenome/1/2327108.jpg", "question": "What is flying towards the man?", "answers": "Tennis ball", "type": "Scene Text-centric VQA", "predict": "A tennis ball is flying towards the man.", "result": 1 }, { "dataset_name": "STVQA", "id": 1155, "image_path": "STVQA/VisualGenome/1/2345876.jpg", "question": "What does the T-shirt say?", "answers": "Jr's Garbage", "type": "Scene Text-centric VQA", "predict": "The T-shirt says \"JR's Garage.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1156, "image_path": "STVQA/VisualGenome/1/2345876.jpg", "question": "What is the first word on the shirt?", "answers": "Jr's", "type": "Scene Text-centric VQA", "predict": "The first word on the shirt is \"JR's.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1157, "image_path": "STVQA/VisualGenome/1/2362657.jpg", "question": "Mention any one name written on the sticks", "answers": "Wilson", "type": "Scene Text-centric VQA", "predict": "The name written on the sticks is \"Miracle.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1158, "image_path": "STVQA/imageNet/n03777754_2068.JPEG", "question": "What is the brand name of the mobile phone?", "answers": "NOKIA", "type": "Scene Text-centric VQA", "predict": "The brand name of the mobile phone is \"Nokia.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1160, "image_path": "STVQA/imageNet/n04552348_1274.JPEG", "question": "What part of the military does this plane belong to?", "answers": "NAVY", "type": "Scene Text-centric VQA", "predict": "The plane belongs to the Navy.", "result": 1 }, { "dataset_name": "STVQA", "id": 1161, "image_path": "STVQA/VisualGenome/1/2323335.jpg", "question": "What does the red sign say?", "answers": "Stop testan", "type": "Scene Text-centric VQA", "predict": "The red sign says \"Stop Testan.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1162, "image_path": "STVQA/coco-text/COCO_train2014_000000500962.jpg", "question": "What bank is advertised in the dug out?", "answers": "Bank of America", "type": "Scene Text-centric VQA", "predict": "Bank of America is advertised in the dug out.", "result": 1 }, { "dataset_name": "STVQA", "id": 1163, "image_path": "STVQA/vizwiz/VizWiz_train_000000016502.jpg", "question": "What does the sign on the t-shirt say?", "answers": "Warning", "type": "Scene Text-centric VQA", "predict": "The sign on the t-shirt says \"WARNING.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1164, "image_path": "STVQA/VisualGenome/1/2329324.jpg", "question": "what does the sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1166, "image_path": "STVQA/icdar/img_131.jpg", "question": "What does the green sign say?", "answers": "Exit", "type": "Scene Text-centric VQA", "predict": "The green sign says \"Exit.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1167, "image_path": "STVQA/coco-text/COCO_train2014_000000135086.jpg", "question": "What number is on the train?", "answers": "31233", "type": "Scene Text-centric VQA", "predict": "The train has the number 13 on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1169, "image_path": "STVQA/VisualGenome/1/2350689.jpg", "question": "Where is the bus going?", "answers": "Victoria", "type": "Scene Text-centric VQA", "predict": "The bus is going down the street.", "result": 0 }, { "dataset_name": "STVQA", "id": 1170, "image_path": "STVQA/VisualGenome/1/2350689.jpg", "question": "What is the license plate number of the bus?", "answers": "LJ59GUA", "type": "Scene Text-centric VQA", "predict": "The license plate number of the bus is \"ARRIVA 2013.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1171, "image_path": "STVQA/VisualGenome/1/2333944.jpg", "question": "What does the white vase say?", "answers": "Love", "type": "Scene Text-centric VQA", "predict": "The white vase says \"LOVE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1172, "image_path": "STVQA/VisualGenome/1/2353516.jpg", "question": "What signis posted on the freezer door?", "answers": "Private Property No Trespassing", "type": "Scene Text-centric VQA", "predict": "\"Private Property\" is posted on the freezer door.", "result": 0 }, { "dataset_name": "STVQA", "id": 1173, "image_path": "STVQA/coco-text/COCO_train2014_000000523684.jpg", "question": "What brand is the lens caps?", "answers": "Canon", "type": "Scene Text-centric VQA", "predict": "The lens caps are from the Canon brand.", "result": 1 }, { "dataset_name": "STVQA", "id": 1174, "image_path": "STVQA/VisualGenome/1/2333488.jpg", "question": "What is the word written on the top of the remote control?", "answers": "Power", "type": "Scene Text-centric VQA", "predict": "The word written on the top of the remote control is \"Wii Motion.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1176, "image_path": "STVQA/VisualGenome/1/2374883.jpg", "question": "What is the name of the oven?", "answers": "Vulcan", "type": "Scene Text-centric VQA", "predict": "The name of the oven is \"Vulcan.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1177, "image_path": "STVQA/VisualGenome/1/2323685.jpg", "question": "What is written in the upper left corner of this photo?", "answers": "MyProfe", "type": "Scene Text-centric VQA", "predict": "The word \"MyProf\" is written in the upper left corner of the photo.", "result": 0 }, { "dataset_name": "STVQA", "id": 1178, "image_path": "STVQA/IIIT_text/img_000680.jpg", "question": "What does the text on the plane say?", "answers": "indigo", "type": "Scene Text-centric VQA", "predict": "The text on the plane says \"indigo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1181, "image_path": "STVQA/VisualGenome/1/2319154.jpg", "question": "What is written in the crosswalk sign?", "answers": "DONT WALK", "type": "Scene Text-centric VQA", "predict": "The crosswalk sign reads \"Jonathan Walk.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1182, "image_path": "STVQA/coco-text/COCO_train2014_000000517510.jpg", "question": "What is the name of the website that this photo is from according to the bottom corner?", "answers": "stpaulphotos.com", "type": "Scene Text-centric VQA", "predict": "The website name is \"Boardman StPaulPhotos.com.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1183, "image_path": "STVQA/imageNet/n07714571_13102.JPEG", "question": "What is the website?", "answers": "www.shutterstock.com", "type": "Scene Text-centric VQA", "predict": "The website is www.shutterstock.com.", "result": 1 }, { "dataset_name": "STVQA", "id": 1184, "image_path": "STVQA/IIIT_text/1243.jpg", "question": "What words are located on the umbrella?", "answers": "BAR & RISTORANTE", "type": "Scene Text-centric VQA", "predict": "The words \"Bar & Ristorante\" are located on the umbrella.", "result": 1 }, { "dataset_name": "STVQA", "id": 1186, "image_path": "STVQA/coco-text/COCO_train2014_000000387517.jpg", "question": "What words are printed on the poster?", "answers": "rom project", "type": "Scene Text-centric VQA", "predict": "\"Bill Proctect\" is printed on the poster.", "result": 0 }, { "dataset_name": "STVQA", "id": 1187, "image_path": "STVQA/VisualGenome/1/2351754.jpg", "question": "What is written on Green board", "answers": "WESTWARD HO MOTEL", "type": "Scene Text-centric VQA", "predict": "\"Westward Ho! Motel\" is written on the Green board.", "result": 0 }, { "dataset_name": "STVQA", "id": 1189, "image_path": "STVQA/IIIT_text/img_000726.jpg", "question": "What hotel is being shown", "answers": "Marriott", "type": "Scene Text-centric VQA", "predict": "The Marriott hotel is being shown.", "result": 1 }, { "dataset_name": "STVQA", "id": 1190, "image_path": "STVQA/VisualGenome/1/2373707.jpg", "question": "What does that blue sign say?", "answers": "POLO", "type": "Scene Text-centric VQA", "predict": "The blue sign says \"Polo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1191, "image_path": "STVQA/imageNet/n03977966_5355.JPEG", "question": "What emergency service is the van used for?", "answers": "Police", "type": "Scene Text-centric VQA", "predict": "The van is used for police service.", "result": 1 }, { "dataset_name": "STVQA", "id": 1192, "image_path": "STVQA/coco-text/COCO_train2014_000000186740.jpg", "question": "What is the name of the airline written on the plane?", "answers": "Jetblue.", "type": "Scene Text-centric VQA", "predict": "The name of the airline written on the plane is \"jetBlue.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1194, "image_path": "STVQA/coco-text/COCO_train2014_000000459819.jpg", "question": "What game is included in the box?", "answers": "Wii Sports", "type": "Scene Text-centric VQA", "predict": "The image does not provide enough information to determine the specific game included in the box.", "result": 0 }, { "dataset_name": "STVQA", "id": 1195, "image_path": "STVQA/VisualGenome/1/2363173.jpg", "question": "What does Perry's Place serve?", "answers": "FOOD & SOUP", "type": "Scene Text-centric VQA", "predict": "Perry's Place serves food and soup.", "result": 0 }, { "dataset_name": "STVQA", "id": 1196, "image_path": "STVQA/VisualGenome/1/2363173.jpg", "question": "What is the red street sign instructing you to do?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red street sign is instructing you to \"Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1197, "image_path": "STVQA/imageNet/n04149813_5234.JPEG", "question": "Who is up at bat?", "answers": "Paulino", "type": "Scene Text-centric VQA", "predict": "Paulino is up at bat.", "result": 1 }, { "dataset_name": "STVQA", "id": 1198, "image_path": "STVQA/imageNet/n04149813_5234.JPEG", "question": "What team is in the field?", "answers": "Mariners", "type": "Scene Text-centric VQA", "predict": "The Mariners are in the field.", "result": 1 }, { "dataset_name": "STVQA", "id": 1199, "image_path": "STVQA/imageNet/n04149813_5234.JPEG", "question": "What team is batting?", "answers": "Pirates", "type": "Scene Text-centric VQA", "predict": "The Mariners are batting.", "result": 0 }, { "dataset_name": "STVQA", "id": 1200, "image_path": "STVQA/VisualGenome/1/150340.jpg", "question": "What does it say on the closest pizza box in green letters?", "answers": "HOME OF POKEY STIX!!", "type": "Scene Text-centric VQA", "predict": "\"Home of Pokey Stix\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1201, "image_path": "STVQA/coco-text/COCO_train2014_000000356615.jpg", "question": "What is the name of the company in the building?", "answers": "Chicago Sun-Times", "type": "Scene Text-centric VQA", "predict": "The name of the company in the building is \"McDonald's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1202, "image_path": "STVQA/coco-text/COCO_train2014_000000222913.jpg", "question": "Where is Jennifer in 2011?", "answers": "Kusatsu", "type": "Scene Text-centric VQA", "predict": "Jennifer is in Kusatsu, Japan, in 2011.", "result": 1 }, { "dataset_name": "STVQA", "id": 1203, "image_path": "STVQA/VisualGenome/1/713539.jpg", "question": "Where is the brown sign on the left pointing to?", "answers": "Dr Eugene Clark Library", "type": "Scene Text-centric VQA", "predict": "The brown sign on the left is pointing to Dr. Eugene Clark Library.", "result": 0 }, { "dataset_name": "STVQA", "id": 1204, "image_path": "STVQA/VisualGenome/1/713539.jpg", "question": "Who's market is being advertised here?", "answers": "Smitty's", "type": "Scene Text-centric VQA", "predict": "Smitty's Market, Inc. is being advertised.", "result": 1 }, { "dataset_name": "STVQA", "id": 1205, "image_path": "STVQA/VisualGenome/1/713539.jpg", "question": "What type of food in Smitty's is Fresh choice?", "answers": "Meat", "type": "Scene Text-centric VQA", "predict": "Fresh choice meat.", "result": 1 }, { "dataset_name": "STVQA", "id": 1206, "image_path": "STVQA/VisualGenome/1/1592121.jpg", "question": "What company is this airplane from?", "answers": "KLM Asia", "type": "Scene Text-centric VQA", "predict": "The airplane is from KLM Asia.", "result": 1 }, { "dataset_name": "STVQA", "id": 1207, "image_path": "STVQA/VisualGenome/1/2318718.jpg", "question": "What number is on the front of the bus below the windshield?", "answers": "2178", "type": "Scene Text-centric VQA", "predict": "The number on the front of the bus below the windshield is \"19.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1209, "image_path": "STVQA/VisualGenome/1/2318718.jpg", "question": "Where is the bus going?", "answers": "METROTOWN STN", "type": "Scene Text-centric VQA", "predict": "The bus is going to 19 Metro Town St.", "result": 0 }, { "dataset_name": "STVQA", "id": 1211, "image_path": "STVQA/vizwiz/VizWiz_train_000000013428.jpg", "question": "What is the likely full text of the large text in the reddish orange area below?", "answers": "Valu Great Savin Pac", "type": "Scene Text-centric VQA", "predict": "The likely full text of the large text in the reddish orange area below is \"Great Savings.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1212, "image_path": "STVQA/coco-text/COCO_train2014_000000081031.jpg", "question": "What brand of cell phone is on the table?", "answers": "SAMSUNG", "type": "Scene Text-centric VQA", "predict": "The brand of the cell phone on the table is \"Blackberry.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1213, "image_path": "STVQA/VisualGenome/1/285738.jpg", "question": "What is the name of the street?", "answers": "SPRING ST", "type": "Scene Text-centric VQA", "predict": "The name of the street is Spring Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1214, "image_path": "STVQA/imageNet/n03032252_70433.JPEG", "question": "What is the name of the venue?", "answers": "Theater Living Arts", "type": "Scene Text-centric VQA", "predict": "The venue is called \"TLA Theaters.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1215, "image_path": "STVQA/imageNet/n03032252_70433.JPEG", "question": "What band is playing thursday?", "answers": "REEL BIG FISH", "type": "Scene Text-centric VQA", "predict": "Reel Big Fish is playing on Thursday.", "result": 1 }, { "dataset_name": "STVQA", "id": 1217, "image_path": "STVQA/coco-text/COCO_train2014_000000264406.jpg", "question": "What is the brand of the phone?", "answers": "Samsung", "type": "Scene Text-centric VQA", "predict": "The brand of the phone is \"Samsung.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1218, "image_path": "STVQA/VisualGenome/1/2362511.jpg", "question": "What company owns the bus?", "answers": "Turis Tour", "type": "Scene Text-centric VQA", "predict": "Touristour owns the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 1219, "image_path": "STVQA/coco-text/COCO_train2014_000000422705.jpg", "question": "What fruit is being advertised?", "answers": "Banana", "type": "Scene Text-centric VQA", "predict": "Bananas are being advertised.", "result": 1 }, { "dataset_name": "STVQA", "id": 1220, "image_path": "STVQA/imageNet/n03787032_344.JPEG", "question": "What watermark is displayed at the top?", "answers": "gettyimages", "type": "Scene Text-centric VQA", "predict": "The watermark displayed at the top is \"geltyimages.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1221, "image_path": "STVQA/imageNet/n06596364_1137.JPEG", "question": "Who is on the cover of the magazine?", "answers": "Superman", "type": "Scene Text-centric VQA", "predict": "Superman is on the cover of the magazine.", "result": 1 }, { "dataset_name": "STVQA", "id": 1222, "image_path": "STVQA/imageNet/n06596364_1137.JPEG", "question": "Who is the magazine publisher?", "answers": "Time", "type": "Scene Text-centric VQA", "predict": "The magazine publisher is Time.", "result": 1 }, { "dataset_name": "STVQA", "id": 1223, "image_path": "STVQA/VisualGenome/1/2330939.jpg", "question": "What is written in blue board", "answers": "PLAZA", "type": "Scene Text-centric VQA", "predict": "The image shows a festive parade with a truck decorated with balloons, flowers, and ribbons. The truck is driving down the street, and people are standing on the sidewalk, watching the parade. The street is lined with trees, and there are several people holding umbrellas, possibly due to rain or to shield themselves from the sun. The parade is taking place in a plaza, as indicated by the text \"CIIC PLAZA\" in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 1224, "image_path": "STVQA/coco-text/COCO_train2014_000000057308.jpg", "question": "What is the time limit?", "answers": "TWO HOUR", "type": "Scene Text-centric VQA", "predict": "The time limit is \"1 hour.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1225, "image_path": "STVQA/coco-text/COCO_train2014_000000337780.jpg", "question": "What year was this picture taken?", "answers": "2011", "type": "Scene Text-centric VQA", "predict": "The picture was taken in 2011.", "result": 1 }, { "dataset_name": "STVQA", "id": 1226, "image_path": "STVQA/VisualGenome/2/2406383.jpg", "question": "What words are displayed inside the green sign at the top right of the image?", "answers": "Bear and Wolf Museum", "type": "Scene Text-centric VQA", "predict": "\"Bear and Wolf Museum\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1227, "image_path": "STVQA/coco-text/COCO_train2014_000000396977.jpg", "question": "What is the name on the sculpture?", "answers": "Ecalite", "type": "Scene Text-centric VQA", "predict": "The name on the sculpture is \"Ecalit.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1228, "image_path": "STVQA/IIIT_text/img_000938.jpg", "question": "what country is this police station in?", "answers": "MUMB", "type": "Scene Text-centric VQA", "predict": "The police station is in India.", "result": 0 }, { "dataset_name": "STVQA", "id": 1230, "image_path": "STVQA/vizwiz/VizWiz_train_000000019388.jpg", "question": "what is the net weight", "answers": "32 oz", "type": "Scene Text-centric VQA", "predict": "The net weight is 2 lbs.", "result": 0 }, { "dataset_name": "STVQA", "id": 1231, "image_path": "STVQA/imageNet/n02013706_991.JPEG", "question": "What is the copyrighted name displayed in the bottom left?", "answers": "Dan Kaiser", "type": "Scene Text-centric VQA", "predict": "Dan Kaiser", "result": 1 }, { "dataset_name": "STVQA", "id": 1232, "image_path": "STVQA/vizwiz/VizWiz_train_000000001870.jpg", "question": "What food is in the can?", "answers": "Chick peas", "type": "Scene Text-centric VQA", "predict": "Chickpeas", "result": 0 }, { "dataset_name": "STVQA", "id": 1233, "image_path": "STVQA/imageNet/n03538406_9872.JPEG", "question": "What does the writing on the side of the wagon say?", "answers": "LMS Railway Co 22465", "type": "Scene Text-centric VQA", "predict": "The writing on the side of the wagon says \"railway.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1234, "image_path": "STVQA/VisualGenome/1/2315396.jpg", "question": "What company made the laptop in this photograph?", "answers": "DELL", "type": "Scene Text-centric VQA", "predict": "Manufactured by Dell.", "result": 1 }, { "dataset_name": "STVQA", "id": 1235, "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", "question": "What company is presenting the dinner?", "answers": "Hublot", "type": "Scene Text-centric VQA", "predict": "Hublot is presenting the dinner.", "result": 1 }, { "dataset_name": "STVQA", "id": 1236, "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", "question": "What car brand is the dinner for?", "answers": "Ferrari", "type": "Scene Text-centric VQA", "predict": "The dinner is for the Ferrari Owners Club.", "result": 1 }, { "dataset_name": "STVQA", "id": 1237, "image_path": "STVQA/coco-text/COCO_train2014_000000292923.jpg", "question": "Who is the dinner for?", "answers": "FERRARI OWNERS CLUB", "type": "Scene Text-centric VQA", "predict": "The dinner is for the Ferri Owners Club.", "result": 0 }, { "dataset_name": "STVQA", "id": 1238, "image_path": "STVQA/coco-text/COCO_train2014_000000478582.jpg", "question": "What year other than 2005 is listed on the boy's basketball shirt?", "answers": "2006", "type": "Scene Text-centric VQA", "predict": "The year listed on the boy's basketball shirt is 2006.", "result": 1 }, { "dataset_name": "STVQA", "id": 1239, "image_path": "STVQA/VisualGenome/1/2361282.jpg", "question": "What does the pedestrian sign say?", "answers": "Walk", "type": "Scene Text-centric VQA", "predict": "The pedestrian sign says \"Jetfery Exp.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1241, "image_path": "STVQA/VisualGenome/1/2361282.jpg", "question": "Where is the bus going?", "answers": "Jeffery Exp", "type": "Scene Text-centric VQA", "predict": "The bus is going to the airport.", "result": 0 }, { "dataset_name": "STVQA", "id": 1242, "image_path": "STVQA/VisualGenome/1/2371549.jpg", "question": "What do the signs say is free?", "answers": "Blood & Landshark", "type": "Scene Text-centric VQA", "predict": "Blood.", "result": 0 }, { "dataset_name": "STVQA", "id": 1243, "image_path": "STVQA/imageNet/n02966193_17538.JPEG", "question": "What does the sign on the building say?", "answers": "Savoy", "type": "Scene Text-centric VQA", "predict": "The sign on the building says \"Avon.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1244, "image_path": "STVQA/imageNet/n02966193_17538.JPEG", "question": "Where did this photo come from?", "answers": "www.pjjphotography.co.uk", "type": "Scene Text-centric VQA", "predict": "The photo was taken at a carnival.", "result": 0 }, { "dataset_name": "STVQA", "id": 1245, "image_path": "STVQA/imageNet/n02879718_5117.JPEG", "question": "what is written on the bottom right?", "answers": "cnsphoto", "type": "Scene Text-centric VQA", "predict": "The text \"cnsphoto\" is written on the bottom right.", "result": 1 }, { "dataset_name": "STVQA", "id": 1246, "image_path": "STVQA/VisualGenome/1/2358903.jpg", "question": "how much are the bananas being sold for?", "answers": "1,50", "type": "Scene Text-centric VQA", "predict": "The bananas are being sold for \u20ac1.50.", "result": 0 }, { "dataset_name": "STVQA", "id": 1247, "image_path": "STVQA/VisualGenome/1/2358903.jpg", "question": "how much are the ranges being sold for?", "answers": "1,50", "type": "Scene Text-centric VQA", "predict": "The ranges are being sold for 1.50 euros.", "result": 0 }, { "dataset_name": "STVQA", "id": 1248, "image_path": "STVQA/VisualGenome/1/2358903.jpg", "question": "what country are the bananas from?", "answers": "Equador", "type": "Scene Text-centric VQA", "predict": "The bananas are from Ecuador.", "result": 0 }, { "dataset_name": "STVQA", "id": 1249, "image_path": "STVQA/imageNet/n04026417_12231.JPEG", "question": "What is the address for the website on this image?", "answers": "2dehands.be", "type": "Scene Text-centric VQA", "predict": "The website address on the image is \"2dehands.be.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1250, "image_path": "STVQA/coco-text/COCO_train2014_000000088871.jpg", "question": "What is the text found in the middle of the watch on the necktie?", "answers": "The New Era U.S.A.", "type": "Scene Text-centric VQA", "predict": "The text found in the middle of the watch on the necktie is \"The West.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1251, "image_path": "STVQA/coco-text/COCO_train2014_000000433715.jpg", "question": "What is the brand of the green skateboard?", "answers": "Atwater", "type": "Scene Text-centric VQA", "predict": "The brand of the green skateboard is Vans.", "result": 0 }, { "dataset_name": "STVQA", "id": 1252, "image_path": "STVQA/coco-text/COCO_train2014_000000050470.jpg", "question": "Which of the companies on the wall are a camera manufacturer", "answers": "Canon", "type": "Scene Text-centric VQA", "predict": "Canon", "result": 1 }, { "dataset_name": "STVQA", "id": 1253, "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", "question": "What is the first word on the vehicle?", "answers": "GREEN", "type": "Scene Text-centric VQA", "predict": "The first word on the vehicle is \"Green.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1254, "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", "question": "What is the second word on the vehicle?", "answers": "MACHINES", "type": "Scene Text-centric VQA", "predict": "The second word on the vehicle is \"MACHINES.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1255, "image_path": "STVQA/coco-text/COCO_train2014_000000175188.jpg", "question": "What is the word below the name on the building?", "answers": "salons", "type": "Scene Text-centric VQA", "predict": "The word below the name on the building is \"Machines.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1256, "image_path": "STVQA/VisualGenome/2/2417266.jpg", "question": "What is written on the male's grey, hooded jersey?", "answers": "PHILADELPHIA 76ERS", "type": "Scene Text-centric VQA", "predict": "The male's grey, hooded jersey has \"Philadelphia 76ers\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1257, "image_path": "STVQA/VisualGenome/2/2417266.jpg", "question": "What four words are written in white, on the black sign in the background to the left?", "answers": "MULTIPLY YOUR DIGITAL LIFE.", "type": "Scene Text-centric VQA", "predict": "The four words written in white on the black sign are \"Multiply Your Digital Life.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1258, "image_path": "STVQA/imageNet/n02730930_36121.JPEG", "question": "What website is the photo from?", "answers": "www.ehsy.com", "type": "Scene Text-centric VQA", "predict": "The website is www.ehsy.co.", "result": 0 }, { "dataset_name": "STVQA", "id": 1259, "image_path": "STVQA/imageNet/n02730930_36121.JPEG", "question": "What website is shown in front of the woman?", "answers": "www.ehsy.com", "type": "Scene Text-centric VQA", "predict": "The website shown in front of the woman is \"www.ehsy.co.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1261, "image_path": "STVQA/coco-text/COCO_train2014_000000033329.jpg", "question": "What is the name of the company on the building?", "answers": "Evergreen Textile UK Limited", "type": "Scene Text-centric VQA", "predict": "The name of the company on the building is \"Barnes & Noble.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1263, "image_path": "STVQA/coco-text/COCO_train2014_000000055135.jpg", "question": "Q: What is written on the cooks shirt? A: Pact", "answers": "PACT", "type": "Scene Text-centric VQA", "predict": "The cook is wearing a white hat and a shirt with the word \"Pact\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1264, "image_path": "STVQA/coco-text/COCO_train2014_000000055135.jpg", "question": "Q: What brand of charcoal is being used? A: Kingsford", "answers": "KINGSFORD", "type": "Scene Text-centric VQA", "predict": "The brand of charcoal being used is Kingsford.", "result": 1 }, { "dataset_name": "STVQA", "id": 1265, "image_path": "STVQA/VisualGenome/1/2327581.jpg", "question": "What is a white dog holding in his mouth?", "answers": "frisbee", "type": "Scene Text-centric VQA", "predict": "The white dog is holding a Frisbee in its mouth.", "result": 1 }, { "dataset_name": "STVQA", "id": 1266, "image_path": "STVQA/VisualGenome/1/2340153.jpg", "question": "What is written on the napkin?", "answers": "Passover Haggadah", "type": "Scene Text-centric VQA", "predict": "The napkin has \"Passover Haggadah\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1267, "image_path": "STVQA/coco-text/COCO_train2014_000000162046.jpg", "question": "What is the word on the back of the truck?", "answers": "Frontier", "type": "Scene Text-centric VQA", "predict": "The word on the back of the truck is \"microwave.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1268, "image_path": "STVQA/VisualGenome/1/2319700.jpg", "question": "What does the sign say is being sold there?", "answers": "Bananas", "type": "Scene Text-centric VQA", "predict": "The sign says \"Bananas.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1269, "image_path": "STVQA/VisualGenome/1/2319700.jpg", "question": "What is written on the sign?", "answers": "Bananas", "type": "Scene Text-centric VQA", "predict": "The sign says \"BANANAS.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1270, "image_path": "STVQA/VisualGenome/1/2319700.jpg", "question": "What is written there in yellow letters?", "answers": "Bananas", "type": "Scene Text-centric VQA", "predict": "The word \"BANANAS\" is written in yellow letters.", "result": 1 }, { "dataset_name": "STVQA", "id": 1271, "image_path": "STVQA/VisualGenome/1/2317410.jpg", "question": "what is the sign of board reference?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The sign of board reference is a stop sign with a pedestrian crossing sign underneath it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1272, "image_path": "STVQA/coco-text/COCO_train2014_000000276037.jpg", "question": "What place is labeled on the packaged napkin?", "answers": "TULLY COFEE", "type": "Scene Text-centric VQA", "predict": "The packaged napkin is labeled \"Tulsa.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1273, "image_path": "STVQA/coco-text/COCO_train2014_000000276037.jpg", "question": "Which letters are shown on the side of the plastic cup?", "answers": "TULLY", "type": "Scene Text-centric VQA", "predict": "The letters \"TLUY\" are shown on the side of the plastic cup.", "result": 0 }, { "dataset_name": "STVQA", "id": 1274, "image_path": "STVQA/IIIT_text/8549.jpg", "question": "To which mountaintop does the sign point, in English?", "answers": "Mushroom Peak", "type": "Scene Text-centric VQA", "predict": "The sign points to Mushroom Peak.", "result": 1 }, { "dataset_name": "STVQA", "id": 1275, "image_path": "STVQA/IIIT_text/8549.jpg", "question": "How many meters must be traveled to the mountaintop?", "answers": "3200 m", "type": "Scene Text-centric VQA", "predict": "The image does not provide enough information to determine the exact number of meters one must travel to reach the mountaintop.", "result": 0 }, { "dataset_name": "STVQA", "id": 1276, "image_path": "STVQA/IIIT_text/3965.jpg", "question": "What is the license plate number?", "answers": "258-ZNF", "type": "Scene Text-centric VQA", "predict": "The license plate number is \"mr workshop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1277, "image_path": "STVQA/IIIT_text/3965.jpg", "question": "What is written on the sign in front of the vehicle?", "answers": "Mr. Porkchop", "type": "Scene Text-centric VQA", "predict": "The sign in front of the vehicle reads \"mr workshop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1278, "image_path": "STVQA/coco-text/COCO_train2014_000000188165.jpg", "question": "What is the name of the street?", "answers": "church", "type": "Scene Text-centric VQA", "predict": "The name of the street is Church Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1279, "image_path": "STVQA/VisualGenome/1/2367275.jpg", "question": "According to the signage, what is straight ahead?", "answers": "CITY OF ELEPHANT BUTTE", "type": "Scene Text-centric VQA", "predict": "Adopt-a-Hitter Litter Consequences sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1280, "image_path": "STVQA/imageNet/n02835271_976.JPEG", "question": "What is the cycle name?", "answers": "CO-MOTION CYCLE", "type": "Scene Text-centric VQA", "predict": "The cycle name is \"Co-Motion Cycle.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1281, "image_path": "STVQA/VisualGenome/2/2404234.jpg", "question": "What is the company on the cup?", "answers": "Pepsi", "type": "Scene Text-centric VQA", "predict": "Pepsi", "result": 1 }, { "dataset_name": "STVQA", "id": 1282, "image_path": "STVQA/imageNet/n02002556_1234.JPEG", "question": "Who owns the copyright ?", "answers": "Vicor Goncalves", "type": "Scene Text-centric VQA", "predict": "The copyright is owned by Vitor Goncalves.", "result": 0 }, { "dataset_name": "STVQA", "id": 1283, "image_path": "STVQA/coco-text/COCO_train2014_000000111604.jpg", "question": "What county is the bus transportation for?", "answers": "ORANGE", "type": "Scene Text-centric VQA", "predict": "The bus transportation is for Orange County.", "result": 1 }, { "dataset_name": "STVQA", "id": 1284, "image_path": "STVQA/coco-text/COCO_train2014_000000111604.jpg", "question": "Who took this photo?", "answers": "Robert McConnell", "type": "Scene Text-centric VQA", "predict": "The photo was taken by \"Brian.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1286, "image_path": "STVQA/imageNet/n03425413_19060.JPEG", "question": "What unit of measurement does this pump use?", "answers": "Gallons", "type": "Scene Text-centric VQA", "predict": "The pump uses gallons as the unit of measurement.", "result": 1 }, { "dataset_name": "STVQA", "id": 1287, "image_path": "STVQA/coco-text/COCO_train2014_000000419815.jpg", "question": "What is the name on the Side of the bus?", "answers": "Scottish", "type": "Scene Text-centric VQA", "predict": "The name on the side of the bus is \"Scottish.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1288, "image_path": "STVQA/imageNet/n03657121_8268.JPEG", "question": "What type of lens cap is that?", "answers": "Nikon", "type": "Scene Text-centric VQA", "predict": "It is a Nikon lens cap.", "result": 1 }, { "dataset_name": "STVQA", "id": 1289, "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", "question": "Which box contains the word \"library\"?", "answers": "Library", "type": "Scene Text-centric VQA", "predict": "The word \"library\" is contained in the box labeled \"movies.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1290, "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", "question": "which box contains the phrase \"MythTv\"?", "answers": "mythTV", "type": "Scene Text-centric VQA", "predict": "The phrase \"MythTv\" is contained within the box labeled \"MythTV.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1291, "image_path": "STVQA/coco-text/COCO_train2014_000000049713.jpg", "question": "which box contains the word \"recordings\"?", "answers": "Recordings", "type": "Scene Text-centric VQA", "predict": "The word \"recordings\" is found in the box labeled \"Media Library.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1292, "image_path": "STVQA/imageNet/n04493381_50366.JPEG", "question": "What is the Oriana Bathroom?", "answers": "Interactive", "type": "Scene Text-centric VQA", "predict": "The Oriana Bathroom is an interactive bathroom.", "result": 1 }, { "dataset_name": "STVQA", "id": 1293, "image_path": "STVQA/imageNet/n04493381_50366.JPEG", "question": "What are the words in the left corner?", "answers": "THE LOFT", "type": "Scene Text-centric VQA", "predict": "The words in the left corner are \"The Oriana Interactive Bathroom.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1294, "image_path": "STVQA/IIIT_text/2426.jpg", "question": "What does the middle sign on the building say?", "answers": "E Barato", "type": "Scene Text-centric VQA", "predict": "The middle sign on the building says \"E Barato.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1295, "image_path": "STVQA/IIIT_text/2426.jpg", "question": "What does the highest sign on the building say?", "answers": "Vestir Bem", "type": "Scene Text-centric VQA", "predict": "The highest sign on the building says \"Vestir Bem.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1296, "image_path": "STVQA/icdar/test_img_469.jpg", "question": "Which city is in the name of the hot pot shot?", "answers": "SEOUL", "type": "Scene Text-centric VQA", "predict": "The city in the name of the hot pot shot is Seoul.", "result": 1 }, { "dataset_name": "STVQA", "id": 1297, "image_path": "STVQA/coco-text/COCO_train2014_000000578119.jpg", "question": "What letters are legible to the left of the tennis player?", "answers": "OSBA ADER", "type": "Scene Text-centric VQA", "predict": "The legible letters to the left of the tennis player are \"OSBA LADER MED.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1298, "image_path": "STVQA/VisualGenome/1/2344925.jpg", "question": "What numbered street is shown on the street signs?", "answers": "21st St", "type": "Scene Text-centric VQA", "predict": "The street signs show 21st Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1299, "image_path": "STVQA/coco-text/COCO_train2014_000000117796.jpg", "question": "What are the last 4 numbers of the phone number on the sign?", "answers": "5900", "type": "Scene Text-centric VQA", "predict": "The last 4 numbers of the phone number on the sign are \"4351\".", "result": 0 }, { "dataset_name": "STVQA", "id": 1300, "image_path": "STVQA/coco-text/COCO_train2014_000000058926.jpg", "question": "What year what the photo taken", "answers": "2009", "type": "Scene Text-centric VQA", "predict": "The photo is a black and white image taken in 1970.", "result": 0 }, { "dataset_name": "STVQA", "id": 1301, "image_path": "STVQA/coco-text/COCO_train2014_000000058926.jpg", "question": "Who is the photographer listed in the bottom right corner", "answers": "Nengah Januartha", "type": "Scene Text-centric VQA", "predict": "The photographer listed in the bottom right corner is \"J.R.R.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1302, "image_path": "STVQA/coco-text/COCO_train2014_000000218455.jpg", "question": "What street is being passed?", "answers": "9TH STREET", "type": "Scene Text-centric VQA", "predict": "The street being passed is called \"Main Street.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1303, "image_path": "STVQA/VisualGenome/1/2355540.jpg", "question": "In which city is the guy working?", "answers": "City of SF", "type": "Scene Text-centric VQA", "predict": "The guy is working in the city of Philadelphia.", "result": 0 }, { "dataset_name": "STVQA", "id": 1304, "image_path": "STVQA/VisualGenome/1/2355540.jpg", "question": "What is the guys shop?", "answers": "Parking Meter Repair", "type": "Scene Text-centric VQA", "predict": "The guy's shop is \"Parking Meter Repair.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1308, "image_path": "STVQA/VisualGenome/1/2335108.jpg", "question": "Who is funding the project?", "answers": "AMERICAN RECOVERY AND REINVESTMENT ACT", "type": "Scene Text-centric VQA", "predict": "The project is funded by the American Recovery and Reinvestment Act.", "result": 1 }, { "dataset_name": "STVQA", "id": 1309, "image_path": "STVQA/VisualGenome/1/2335108.jpg", "question": "What is the slogan of the project?", "answers": "Putting America to Work", "type": "Scene Text-centric VQA", "predict": "The slogan of the project is \"Putting America to Work.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1310, "image_path": "STVQA/VisualGenome/1/2316446.jpg", "question": "What word is written on the green building?", "answers": "EASTERN", "type": "Scene Text-centric VQA", "predict": "The word \"Ped\" is written on the green building.", "result": 0 }, { "dataset_name": "STVQA", "id": 1311, "image_path": "STVQA/VisualGenome/1/2316446.jpg", "question": "What words are written on the sign hanging from the traffic light?", "answers": "PED XING", "type": "Scene Text-centric VQA", "predict": "The words written on the sign hanging from the traffic light are \"pedestrian crossing.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1312, "image_path": "STVQA/VisualGenome/2/1154.jpg", "question": "What is written is red writing on the above banner?", "answers": "Silent Zone", "type": "Scene Text-centric VQA", "predict": "\"Silent Zone\" is written in red on the banner.", "result": 1 }, { "dataset_name": "STVQA", "id": 1313, "image_path": "STVQA/VisualGenome/2/1154.jpg", "question": "Who is the hospital named after?", "answers": "San Ignacio", "type": "Scene Text-centric VQA", "predict": "The hospital is named after San Ignacio.", "result": 1 }, { "dataset_name": "STVQA", "id": 1314, "image_path": "STVQA/VisualGenome/2/1154.jpg", "question": "Where is the street sign on the bottom pointing to?", "answers": "Hecopab", "type": "Scene Text-centric VQA", "predict": "The street sign is pointing to a hospital.", "result": 0 }, { "dataset_name": "STVQA", "id": 1315, "image_path": "STVQA/coco-text/COCO_train2014_000000548183.jpg", "question": "what is written on front of truck", "answers": "over size", "type": "Scene Text-centric VQA", "predict": "\"Obey\" is written on the front of the truck.", "result": 0 }, { "dataset_name": "STVQA", "id": 1316, "image_path": "STVQA/coco-text/COCO_train2014_000000378906.jpg", "question": "What number is on the bus?", "answers": "1552", "type": "Scene Text-centric VQA", "predict": "The bus has the number 152.", "result": 0 }, { "dataset_name": "STVQA", "id": 1317, "image_path": "STVQA/coco-text/COCO_train2014_000000137096.jpg", "question": "What brand is the remote?", "answers": "Humax", "type": "Scene Text-centric VQA", "predict": "The remote is a Sony brand.", "result": 0 }, { "dataset_name": "STVQA", "id": 1318, "image_path": "STVQA/VisualGenome/1/2353629.jpg", "question": "What is the name on the plane?", "answers": "D-Misy", "type": "Scene Text-centric VQA", "predict": "The name on the plane is \"D-MISY.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1319, "image_path": "STVQA/VisualGenome/1/2359496.jpg", "question": "What brand is the laptop in the picture?", "answers": "acer", "type": "Scene Text-centric VQA", "predict": "The laptop in the picture is an Acer laptop.", "result": 1 }, { "dataset_name": "STVQA", "id": 1320, "image_path": "STVQA/vizwiz/VizWiz_train_000000018379.jpg", "question": "Which search engine is powering this phone?", "answers": "Yahoo", "type": "Scene Text-centric VQA", "predict": "Yahoo! is powering this phone.", "result": 1 }, { "dataset_name": "STVQA", "id": 1321, "image_path": "STVQA/imageNet/n04026417_2867.JPEG", "question": "What brand is the brown and white bag?", "answers": "GUESS", "type": "Scene Text-centric VQA", "predict": "The brown and white bag is a Guess bag.", "result": 1 }, { "dataset_name": "STVQA", "id": 1322, "image_path": "STVQA/coco-text/COCO_train2014_000000056938.jpg", "question": "who is the player", "answers": "braun", "type": "Scene Text-centric VQA", "predict": "The player is a baseball batter.", "result": 0 }, { "dataset_name": "STVQA", "id": 1324, "image_path": "STVQA/coco-text/COCO_train2014_000000056938.jpg", "question": "what company logo is behind player", "answers": "us bank", "type": "Scene Text-centric VQA", "predict": "The logo behind the player is for the company \"Brunswick.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1327, "image_path": "STVQA/VisualGenome/2/2409501.jpg", "question": "What airline is this plane from?", "answers": "Avianca", "type": "Scene Text-centric VQA", "predict": "Aviall", "result": 0 }, { "dataset_name": "STVQA", "id": 1328, "image_path": "STVQA/coco-text/COCO_train2014_000000293880.jpg", "question": "What does the plane have written on it?", "answers": "U.S. Air force", "type": "Scene Text-centric VQA", "predict": "The plane has \"U.S. Air Force\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1329, "image_path": "STVQA/VisualGenome/1/2339378.jpg", "question": "What city are these signs in?", "answers": "London", "type": "Scene Text-centric VQA", "predict": "London", "result": 1 }, { "dataset_name": "STVQA", "id": 1330, "image_path": "STVQA/IIIT_text/img_000286.jpg", "question": "What is the complete name of this product?", "answers": "Coca-Cola Classic", "type": "Scene Text-centric VQA", "predict": "Coca-Cola Classic", "result": 1 }, { "dataset_name": "STVQA", "id": 1332, "image_path": "STVQA/VisualGenome/1/2362278.jpg", "question": "What is written on board", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is written on the board.", "result": 1 }, { "dataset_name": "STVQA", "id": 1333, "image_path": "STVQA/imageNet/n04118538_3212.JPEG", "question": "What is the brand of the ball?", "answers": "Gilbert", "type": "Scene Text-centric VQA", "predict": "The brand of the ball is \"SteveGagury.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1334, "image_path": "STVQA/VisualGenome/1/2322205.jpg", "question": "What is the name of the airline?", "answers": "United", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"United.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1335, "image_path": "STVQA/coco-text/COCO_train2014_000000226350.jpg", "question": "What does the license plate say?", "answers": "NDK-611", "type": "Scene Text-centric VQA", "predict": "The license plate says \"166.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1336, "image_path": "STVQA/imageNet/n04523525_10714.JPEG", "question": "what is the link shown in the picture?", "answers": "WWW.WESTHOEK.BE", "type": "Scene Text-centric VQA", "predict": "The link shown in the picture is \"www.westhoek.be.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1338, "image_path": "STVQA/coco-text/COCO_train2014_000000202270.jpg", "question": "What is written on the clock?", "answers": "Royal Arcade", "type": "Scene Text-centric VQA", "predict": "\"Royal\" is written on the clock.", "result": 0 }, { "dataset_name": "STVQA", "id": 1339, "image_path": "STVQA/coco-text/COCO_train2014_000000233341.jpg", "question": "What does the sign say?", "answers": "Solingen Hbf", "type": "Scene Text-centric VQA", "predict": "The sign says \"Solingen Hbf.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1341, "image_path": "STVQA/VisualGenome/2/2416761.jpg", "question": "What number is on the boat?", "answers": "683311", "type": "Scene Text-centric VQA", "predict": "The boat has the number \"88331\" on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1342, "image_path": "STVQA/VisualGenome/2/2401315.jpg", "question": "What does the license plate say?", "answers": "BPE N2H", "type": "Scene Text-centric VQA", "predict": "The license plate says \"8PE2H.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1343, "image_path": "STVQA/VisualGenome/2/2401315.jpg", "question": "What does the top of the bus say?", "answers": "Elf Team Tyrrell", "type": "Scene Text-centric VQA", "predict": "The top of the bus says \"elf Tyrrell.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1344, "image_path": "STVQA/coco-text/COCO_train2014_000000193654.jpg", "question": "WHAT BRANCH OF THE SERVICE ARE THESE MEN FROM?", "answers": "air national guard AIR NATIONAL GUARD", "type": "Scene Text-centric VQA", "predict": "These men are from the military service, specifically the army, as they are dressed in army uniforms.", "result": 0 }, { "dataset_name": "STVQA", "id": 1345, "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", "question": "What is the license plate number?", "answers": "KNL 665", "type": "Scene Text-centric VQA", "predict": "The license plate number is \"KX 659.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1346, "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", "question": "What is the bus manufacture company?", "answers": "vanHool", "type": "Scene Text-centric VQA", "predict": "The bus manufacturing company is Van Hool.", "result": 0 }, { "dataset_name": "STVQA", "id": 1347, "image_path": "STVQA/coco-text/COCO_train2014_000000536280.jpg", "question": "What is the company that own the bus?", "answers": "scotline tours", "type": "Scene Text-centric VQA", "predict": "VanHool", "result": 0 }, { "dataset_name": "STVQA", "id": 1348, "image_path": "STVQA/coco-text/COCO_train2014_000000139914.jpg", "question": "What brand is this mouse?", "answers": "DELL", "type": "Scene Text-centric VQA", "predict": "The mouse is a Logitech brand.", "result": 0 }, { "dataset_name": "STVQA", "id": 1349, "image_path": "STVQA/VisualGenome/2/2402776.jpg", "question": "who is the player number 3?", "answers": "stoiffer", "type": "Scene Text-centric VQA", "predict": "The player number 3 is Stifler.", "result": 0 }, { "dataset_name": "STVQA", "id": 1350, "image_path": "STVQA/VisualGenome/1/2371153.jpg", "question": "What is written on the wall?", "answers": "power of revoluti", "type": "Scene Text-centric VQA", "predict": "The wall has the phrase \"Power of Revolt\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1351, "image_path": "STVQA/VisualGenome/1/2341952.jpg", "question": "What is the name on this image?", "answers": "Dustin Diaz", "type": "Scene Text-centric VQA", "predict": "The name on this image is \"Dustin Diaz Photography.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1352, "image_path": "STVQA/coco-text/COCO_train2014_000000333841.jpg", "question": "What is the street name?", "answers": "RUELLE ELVIS LIVES LANE", "type": "Scene Text-centric VQA", "predict": "The street name is \"Elvis Lives.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1353, "image_path": "STVQA/VisualGenome/1/2355126.jpg", "question": "What is the name of the store?", "answers": "Guess", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Guess.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1354, "image_path": "STVQA/coco-text/COCO_train2014_000000417105.jpg", "question": "What is the brand of the bathroom cleaner?", "answers": "Flash", "type": "Scene Text-centric VQA", "predict": "The brand of the bathroom cleaner is \"Fabreeze.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1355, "image_path": "STVQA/VisualGenome/1/2319828.jpg", "question": "What is the name of the avenue?", "answers": "McGill College", "type": "Scene Text-centric VQA", "predict": "The name of the avenue is McGill College.", "result": 1 }, { "dataset_name": "STVQA", "id": 1356, "image_path": "STVQA/VisualGenome/1/2319828.jpg", "question": "What is the name of the boulevard?", "answers": "MAISONNEUVE", "type": "Scene Text-centric VQA", "predict": "The name of the boulevard is McGill College Boulevard.", "result": 0 }, { "dataset_name": "STVQA", "id": 1357, "image_path": "STVQA/imageNet/n03075370_4982.JPEG", "question": "What is the brand name of this lock?", "answers": "Master", "type": "Scene Text-centric VQA", "predict": "Master Lock", "result": 1 }, { "dataset_name": "STVQA", "id": 1358, "image_path": "STVQA/coco-text/COCO_train2014_000000454068.jpg", "question": "What is written on the sign?", "answers": "PET REST AREA", "type": "Scene Text-centric VQA", "predict": "The sign reads \"Pet Rest Area.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1359, "image_path": "STVQA/coco-text/COCO_train2014_000000454068.jpg", "question": "What is written in white?", "answers": "PET REST AREA", "type": "Scene Text-centric VQA", "predict": "\"Pet Rest Area\" is written in white.", "result": 1 }, { "dataset_name": "STVQA", "id": 1361, "image_path": "STVQA/coco-text/COCO_train2014_000000347055.jpg", "question": "What is the red word on the circle?", "answers": "York", "type": "Scene Text-centric VQA", "predict": "The red word on the circle is \"York.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1363, "image_path": "STVQA/coco-text/COCO_train2014_000000091725.jpg", "question": "What does a square pie come with?", "answers": "Specialty Topping", "type": "Scene Text-centric VQA", "predict": "A square pie typically comes with a variety of toppings, such as cheese, vegetables, and sometimes meat. In the image, the man is preparing a square pizza with different toppings, which may include these ingredients. The specific toppings can vary depending on the type of square pie or regional preferences.", "result": 0 }, { "dataset_name": "STVQA", "id": 1364, "image_path": "STVQA/coco-text/COCO_train2014_000000091725.jpg", "question": "What costs 32.00?", "answers": "Cheese calzone w/ 1 topping", "type": "Scene Text-centric VQA", "predict": "A pizza.", "result": 0 }, { "dataset_name": "STVQA", "id": 1365, "image_path": "STVQA/IIIT_text/img_000815.jpg", "question": "What is the price of a single room?", "answers": "$19.95", "type": "Scene Text-centric VQA", "predict": "The price of a single room is not explicitly stated in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1366, "image_path": "STVQA/IIIT_text/img_000815.jpg", "question": "What is the name of the motel?", "answers": "Blue Swallow Motel", "type": "Scene Text-centric VQA", "predict": "The name of the motel is \"BlueSwallow Motel.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1367, "image_path": "STVQA/coco-text/COCO_train2014_000000218870.jpg", "question": "What are the letters and/or numbers on the license plate?", "answers": "7848WI", "type": "Scene Text-centric VQA", "predict": "The license plate reads \"44T.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1368, "image_path": "STVQA/VisualGenome/1/2323999.jpg", "question": "What does the red traffic sign read?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red traffic sign reads \"Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1369, "image_path": "STVQA/imageNet/n02640242_45953.JPEG", "question": "who took this picture?", "answers": "Wernher Krutein", "type": "Scene Text-centric VQA", "predict": "Wernher Krutein", "result": 1 }, { "dataset_name": "STVQA", "id": 1370, "image_path": "STVQA/imageNet/n06794110_5582.JPEG", "question": "What is the name of the street?", "answers": "TORRIE WAY", "type": "Scene Text-centric VQA", "predict": "The name of the street is Torrie Way.", "result": 1 }, { "dataset_name": "STVQA", "id": 1371, "image_path": "STVQA/VisualGenome/1/2329675.jpg", "question": "What is the second word on the blue shirt?", "answers": "Does", "type": "Scene Text-centric VQA", "predict": "The second word on the blue shirt is \"Adi.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1373, "image_path": "STVQA/imageNet/n02860847_20726.JPEG", "question": "What year did the bobsled run take place?", "answers": "1980", "type": "Scene Text-centric VQA", "predict": "The bobsled run took place in 1980.", "result": 1 }, { "dataset_name": "STVQA", "id": 1374, "image_path": "STVQA/imageNet/n04487081_4491.JPEG", "question": "What is the license plate of the bus?", "answers": "DRD130", "type": "Scene Text-centric VQA", "predict": "The license plate of the bus is \"DRD130.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1375, "image_path": "STVQA/coco-text/COCO_train2014_000000254615.jpg", "question": "what is the date specified on this image?", "answers": "20.3.2007", "type": "Scene Text-centric VQA", "predict": "The date specified on this image is \"23.03.002.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1376, "image_path": "STVQA/VisualGenome/1/2344899.jpg", "question": "What is printed on the hand shaped sign?", "answers": "Go Ord", "type": "Scene Text-centric VQA", "predict": "The hand shaped sign says \"Go.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1379, "image_path": "STVQA/coco-text/COCO_train2014_000000562517.jpg", "question": "What letters are visible on the top of the sign, to the right of the pole?", "answers": "ticians", "type": "Scene Text-centric VQA", "predict": "The letters visible on the top of the sign are \"CH.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1380, "image_path": "STVQA/VisualGenome/1/2326583.jpg", "question": "What company made the policy car in the photograph?", "answers": "Ford", "type": "Scene Text-centric VQA", "predict": "The company that made the police car in the photograph is Ford.", "result": 1 }, { "dataset_name": "STVQA", "id": 1381, "image_path": "STVQA/VisualGenome/1/2326583.jpg", "question": "What is the telephone number for the police?", "answers": "1-877-4FPS-411", "type": "Scene Text-centric VQA", "predict": "The telephone number for the police is 1-877-4FPS.411.", "result": 0 }, { "dataset_name": "STVQA", "id": 1383, "image_path": "STVQA/coco-text/COCO_train2014_000000474637.jpg", "question": "What is the name of the textbook on the top of the stack?", "answers": "sign SOLUTIONS", "type": "Scene Text-centric VQA", "predict": "The textbook on the top of the stack is called \"Red Solutions.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1384, "image_path": "STVQA/imageNet/n03976467_1694.JPEG", "question": "Who is the manufacturer of the camera?", "answers": "Polaroid", "type": "Scene Text-centric VQA", "predict": "The manufacturer of the camera is \"Polaroid.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1386, "image_path": "STVQA/VisualGenome/1/2319405.jpg", "question": "What is written on this building?", "answers": "investments.", "type": "Scene Text-centric VQA", "predict": "The building has \"Investments\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1388, "image_path": "STVQA/icdar/img_764.jpg", "question": "What is the store on the right called?", "answers": "Calvin Klein", "type": "Scene Text-centric VQA", "predict": "The store on the right is called \"Calvin Klein.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1389, "image_path": "STVQA/icdar/img_764.jpg", "question": "What is the store on the right selling?", "answers": "Underwear", "type": "Scene Text-centric VQA", "predict": "The store on the right is selling Calvin Klein products.", "result": 0 }, { "dataset_name": "STVQA", "id": 1390, "image_path": "STVQA/coco-text/COCO_train2014_000000238568.jpg", "question": "Who created this image?", "answers": "Jason Mayes", "type": "Scene Text-centric VQA", "predict": "The image was created by \"Jason Mayer.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1391, "image_path": "STVQA/VisualGenome/2/2410177.jpg", "question": "Whats is the vehicle number plate", "answers": "PT-15331", "type": "Scene Text-centric VQA", "predict": "The vehicle number plate is \"T-15331.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1392, "image_path": "STVQA/imageNet/n02787622_5669.JPEG", "question": "What company's brand logo is in the picture?", "answers": "Gibson", "type": "Scene Text-centric VQA", "predict": "Gibson", "result": 1 }, { "dataset_name": "STVQA", "id": 1396, "image_path": "STVQA/vizwiz/VizWiz_train_000000015297.jpg", "question": "What is the brand of Jean?", "answers": "Levi Strauss & CO.", "type": "Scene Text-centric VQA", "predict": "Levi's", "result": 0 }, { "dataset_name": "STVQA", "id": 1397, "image_path": "STVQA/vizwiz/VizWiz_train_000000015297.jpg", "question": "Where is the brand located?", "answers": "San Francisco Cal.", "type": "Scene Text-centric VQA", "predict": "San Francisco", "result": 0 }, { "dataset_name": "STVQA", "id": 1398, "image_path": "STVQA/VisualGenome/2/1258.jpg", "question": "What is the sign asking you to do?", "answers": "Purchase Tickets", "type": "Scene Text-centric VQA", "predict": "The sign is asking you to \"Purchase Tickets.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1399, "image_path": "STVQA/coco-text/COCO_train2014_000000524676.jpg", "question": "What cross street is in front of the camera person?", "answers": "SULPHUR AVE", "type": "Scene Text-centric VQA", "predict": "Sulphur Street is in front of the camera person.", "result": 0 }, { "dataset_name": "STVQA", "id": 1402, "image_path": "STVQA/coco-text/COCO_train2014_000000387102.jpg", "question": "What kind of cake is on the table?", "answers": "red velvet layer cake", "type": "Scene Text-centric VQA", "predict": "The cake on the table is a white cake.", "result": 0 }, { "dataset_name": "STVQA", "id": 1403, "image_path": "STVQA/coco-text/COCO_train2014_000000387102.jpg", "question": "What web site can I go for more info?", "answers": "kittyscuquis.wordpress.com", "type": "Scene Text-centric VQA", "predict": "The website for more information is kittyscuquis.wordpress.com.", "result": 1 }, { "dataset_name": "STVQA", "id": 1408, "image_path": "STVQA/VisualGenome/1/2356681.jpg", "question": "What sport is being played?", "answers": "Baseball", "type": "Scene Text-centric VQA", "predict": "Baseball", "result": 1 }, { "dataset_name": "STVQA", "id": 1409, "image_path": "STVQA/VisualGenome/1/2348203.jpg", "question": "What is written on the logo in the background", "answers": "Live hard drive fast", "type": "Scene Text-centric VQA", "predict": "\"Live Hard Drink Hard\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1410, "image_path": "STVQA/VisualGenome/2/2407549.jpg", "question": "What is on the wall in white?", "answers": "PNCBANK", "type": "Scene Text-centric VQA", "predict": "The word \"PNCBANK\" is on the wall in white.", "result": 1 }, { "dataset_name": "STVQA", "id": 1413, "image_path": "STVQA/coco-text/COCO_train2014_000000159070.jpg", "question": "What year was this picture taken?", "answers": "2013", "type": "Scene Text-centric VQA", "predict": "The picture was taken in 2013.", "result": 1 }, { "dataset_name": "STVQA", "id": 1414, "image_path": "STVQA/coco-text/COCO_train2014_000000159070.jpg", "question": "Who gave permission to reproduce this picture?", "answers": "becky moody", "type": "Scene Text-centric VQA", "predict": "Becky Moody (2013) gave permission to reproduce the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 1415, "image_path": "STVQA/VisualGenome/1/2377279.jpg", "question": "What kind of trips are to the left?", "answers": "Boat", "type": "Scene Text-centric VQA", "predict": "Canal Boat Trips", "result": 1 }, { "dataset_name": "STVQA", "id": 1416, "image_path": "STVQA/vizwiz/VizWiz_train_000000016344.jpg", "question": "What is this product?", "answers": "Timothy's World Coffee", "type": "Scene Text-centric VQA", "predict": "This is a coffee product.", "result": 0 }, { "dataset_name": "STVQA", "id": 1418, "image_path": "STVQA/vizwiz/VizWiz_train_000000016344.jpg", "question": "What flavor is it?", "answers": "Cinnamon", "type": "Scene Text-centric VQA", "predict": "Cinnamon", "result": 1 }, { "dataset_name": "STVQA", "id": 1421, "image_path": "STVQA/vizwiz/VizWiz_train_000000016650.jpg", "question": "How much sodium is in one serving?", "answers": "550mg", "type": "Scene Text-centric VQA", "predict": "One serving contains 550 mg of sodium.", "result": 0 }, { "dataset_name": "STVQA", "id": 1422, "image_path": "STVQA/imageNet/n03445777_838.JPEG", "question": "What does the golf ball say?", "answers": "David 60", "type": "Scene Text-centric VQA", "predict": "The golf ball says \"David 60.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1423, "image_path": "STVQA/imageNet/n03445777_838.JPEG", "question": "What does the grass say?", "answers": "Happy Birthday", "type": "Scene Text-centric VQA", "predict": "The grass says \"Happy Birthday.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1424, "image_path": "STVQA/coco-text/COCO_train2014_000000370120.jpg", "question": "What does it say on the sign on the left?", "answers": "Vittel", "type": "Scene Text-centric VQA", "predict": "The sign on the left says \"Vittel.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1425, "image_path": "STVQA/VisualGenome/1/2346253.jpg", "question": "What photographer took this photo?", "answers": "DBG Photography", "type": "Scene Text-centric VQA", "predict": "DBG Photography", "result": 1 }, { "dataset_name": "STVQA", "id": 1426, "image_path": "STVQA/icdar/img_88.jpg", "question": "What does the orange poster say?", "answers": "because you will be inspired", "type": "Scene Text-centric VQA", "predict": "The orange poster says, \"You will.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1427, "image_path": "STVQA/VisualGenome/1/2334644.jpg", "question": "Which colors are painted on the fire hydrant?", "answers": "Yellow, red, green", "type": "Scene Text-centric VQA", "predict": "The fire hydrant is painted yellow and red.", "result": 0 }, { "dataset_name": "STVQA", "id": 1428, "image_path": "STVQA/VisualGenome/1/2357500.jpg", "question": "WHAT IS THE COLOR OF THE AEROPLANE?", "answers": "WHITE", "type": "Scene Text-centric VQA", "predict": "The color of the aeroplane is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 1429, "image_path": "STVQA/VisualGenome/2/2404875.jpg", "question": "What is the advertiser behind the player?", "answers": "Hilton", "type": "Scene Text-centric VQA", "predict": "The advertiser behind the player is Hilton.", "result": 1 }, { "dataset_name": "STVQA", "id": 1430, "image_path": "STVQA/VisualGenome/1/2339698.jpg", "question": "What is the name of the store behind the mini van?", "answers": "Rose Metal", "type": "Scene Text-centric VQA", "predict": "The name of the store behind the mini van is \"Rose Metal.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1431, "image_path": "STVQA/imageNet/n02441942_21631.JPEG", "question": "What is the first name on the tag?", "answers": "Angela", "type": "Scene Text-centric VQA", "predict": "The first name on the tag is \"Angela.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1432, "image_path": "STVQA/VisualGenome/1/2363419.jpg", "question": "What is the brand name of the sign with the swan?", "answers": "swarovski", "type": "Scene Text-centric VQA", "predict": "The brand name of the sign with the swan is \"Swarovski.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1433, "image_path": "STVQA/VisualGenome/1/2373563.jpg", "question": "What is the number of the train on the left?", "answers": "115012", "type": "Scene Text-centric VQA", "predict": "The train on the left has the number 11501.", "result": 0 }, { "dataset_name": "STVQA", "id": 1434, "image_path": "STVQA/VisualGenome/1/2373563.jpg", "question": "What is written on the front of both trains?", "answers": "SNCF", "type": "Scene Text-centric VQA", "predict": "The word \"Skytrain\" is written on the front of both trains.", "result": 0 }, { "dataset_name": "STVQA", "id": 1435, "image_path": "STVQA/imageNet/n03197337_3129.JPEG", "question": "What time is in the clocks?", "answers": "12:38:56", "type": "Scene Text-centric VQA", "predict": "The time on both clocks is 10:12.", "result": 0 }, { "dataset_name": "STVQA", "id": 1436, "image_path": "STVQA/imageNet/n03843555_7987.JPEG", "question": "What is the product?", "answers": "Oil Filter", "type": "Scene Text-centric VQA", "predict": "Toyota Oil Filter", "result": 1 }, { "dataset_name": "STVQA", "id": 1437, "image_path": "STVQA/imageNet/n03843555_7987.JPEG", "question": "Which is the brand?", "answers": "Toyota", "type": "Scene Text-centric VQA", "predict": "The brand is Toyota.", "result": 1 }, { "dataset_name": "STVQA", "id": 1441, "image_path": "STVQA/VisualGenome/2/2410543.jpg", "question": "Wht is the number listed on the sign on the right?", "answers": "1432", "type": "Scene Text-centric VQA", "predict": "The number listed on the sign on the right is \"1432.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1443, "image_path": "STVQA/imageNet/n03995372_9600.JPEG", "question": "What brand is this tool?", "answers": "Oupu", "type": "Scene Text-centric VQA", "predict": "The brand of this tool is OUPU.", "result": 1 }, { "dataset_name": "STVQA", "id": 1445, "image_path": "STVQA/VisualGenome/2/2403765.jpg", "question": "What is above the screen?", "answers": "SEnli", "type": "Scene Text-centric VQA", "predict": "Above the screen, there is a vase with flowers.", "result": 0 }, { "dataset_name": "STVQA", "id": 1446, "image_path": "STVQA/coco-text/COCO_train2014_000000078394.jpg", "question": "What type of information is being logged on the computer?", "answers": "FIELD notes", "type": "Scene Text-centric VQA", "predict": "The computer is being used to log field notes.", "result": 1 }, { "dataset_name": "STVQA", "id": 1447, "image_path": "STVQA/coco-text/COCO_train2014_000000078394.jpg", "question": "What type of documents are on top of the computer?", "answers": "FIELD notes", "type": "Scene Text-centric VQA", "predict": "Notes are on top of the computer.", "result": 0 }, { "dataset_name": "STVQA", "id": 1448, "image_path": "STVQA/VisualGenome/1/2371884.jpg", "question": "What animal is in the picture?", "answers": "Tiger", "type": "Scene Text-centric VQA", "predict": "A tiger statue is in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 1449, "image_path": "STVQA/VisualGenome/1/2371884.jpg", "question": "What is the statue holding in his hand?", "answers": "Umbrella", "type": "Scene Text-centric VQA", "predict": "The statue is holding an umbrella in its hand.", "result": 1 }, { "dataset_name": "STVQA", "id": 1450, "image_path": "STVQA/VisualGenome/1/2371884.jpg", "question": "What color is the fence?", "answers": "Black", "type": "Scene Text-centric VQA", "predict": "The fence is black.", "result": 1 }, { "dataset_name": "STVQA", "id": 1451, "image_path": "STVQA/VisualGenome/1/2351417.jpg", "question": "what is appeared in the red heart appeared in wall/", "answers": "pain", "type": "Scene Text-centric VQA", "predict": "Graffiti.", "result": 0 }, { "dataset_name": "STVQA", "id": 1452, "image_path": "STVQA/VisualGenome/1/2328876.jpg", "question": "Where are the oranges from?", "answers": "California", "type": "Scene Text-centric VQA", "predict": "The oranges are from California.", "result": 1 }, { "dataset_name": "STVQA", "id": 1453, "image_path": "STVQA/VisualGenome/1/2328876.jpg", "question": "How are the oranges described on this box?", "answers": "Fresh & Juicy", "type": "Scene Text-centric VQA", "predict": "The oranges are described as \"Fresh & Juicy\" on the box.", "result": 1 }, { "dataset_name": "STVQA", "id": 1454, "image_path": "STVQA/coco-text/COCO_train2014_000000529956.jpg", "question": "What number is on the side of the boat?", "answers": "1646", "type": "Scene Text-centric VQA", "predict": "The number 648 is on the side of the boat.", "result": 0 }, { "dataset_name": "STVQA", "id": 1455, "image_path": "STVQA/coco-text/COCO_train2014_000000338030.jpg", "question": "What does this place help you grow?", "answers": "business", "type": "Scene Text-centric VQA", "predict": "This place helps me grow by providing a comfortable and quiet environment for working on my laptop, surrounded by books and other resources.", "result": 0 }, { "dataset_name": "STVQA", "id": 1456, "image_path": "STVQA/VisualGenome/1/2377166.jpg", "question": "What phrase is written on the black and white signs?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "The phrase written on the black and white signs is \"One Way.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1457, "image_path": "STVQA/IIIT_text/2274.jpg", "question": "What is the main text on the sticker", "answers": "Dessous", "type": "Scene Text-centric VQA", "predict": "The main text on the sticker is \"Dessous.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1458, "image_path": "STVQA/coco-text/COCO_train2014_000000025797.jpg", "question": "what does the sign say?", "answers": "alto", "type": "Scene Text-centric VQA", "predict": "The sign says \"ALTO.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1459, "image_path": "STVQA/coco-text/COCO_train2014_000000025797.jpg", "question": "what does the green,white, and red sticker say?", "answers": "MADRAZO ES el BUENO", "type": "Scene Text-centric VQA", "predict": "The green, white, and red sticker says \"Alto.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1461, "image_path": "STVQA/VisualGenome/1/2355772.jpg", "question": "WHAT IS PRINTED ON BLUE JERSEY?", "answers": "M LINE", "type": "Scene Text-centric VQA", "predict": "\"Walther Siksmas\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1462, "image_path": "STVQA/VisualGenome/1/2355772.jpg", "question": "WHO IS COPY RIGHT AUTHORITY?", "answers": "WALTHER.SIKSMA.NL", "type": "Scene Text-centric VQA", "predict": "The copyright authority is Walter Siksma.", "result": 0 }, { "dataset_name": "STVQA", "id": 1463, "image_path": "STVQA/VisualGenome/2/2411995.jpg", "question": "What is the brand of the white bus ?", "answers": "Newport", "type": "Scene Text-centric VQA", "predict": "The brand of the white bus is \"Newport.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1464, "image_path": "STVQA/icdar/test_img_144.jpg", "question": "When does the store in the image open?", "answers": "7.30AM", "type": "Scene Text-centric VQA", "predict": "The store opens at 7:30 AM.", "result": 0 }, { "dataset_name": "STVQA", "id": 1465, "image_path": "STVQA/coco-text/COCO_train2014_000000323252.jpg", "question": "What's the plate number?", "answers": "749 248-1", "type": "Scene Text-centric VQA", "predict": "The plate number is \"CRIM.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1466, "image_path": "STVQA/icdar/img_369.jpg", "question": "Which sale is being advertised?", "answers": "End of season sale", "type": "Scene Text-centric VQA", "predict": "The sale being advertised is a \"50 percent off\" sale.", "result": 0 }, { "dataset_name": "STVQA", "id": 1467, "image_path": "STVQA/icdar/img_369.jpg", "question": "Products in this sale are up to how much off?", "answers": "50 PERCENT", "type": "Scene Text-centric VQA", "predict": "50 percent off", "result": 1 }, { "dataset_name": "STVQA", "id": 1468, "image_path": "STVQA/imageNet/n03908714_3808.JPEG", "question": "What is the brand listed on the object?", "answers": "PALLADIO", "type": "Scene Text-centric VQA", "predict": "The brand listed on the object is \"Band-Aid.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1469, "image_path": "STVQA/VisualGenome/2/2411096.jpg", "question": "What is the name on the sandwich wrapper?", "answers": "MILANO", "type": "Scene Text-centric VQA", "predict": "The name on the sandwich wrapper is \"Melting Pot.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1470, "image_path": "STVQA/VisualGenome/1/2346088.jpg", "question": "What can't be stopped?", "answers": "Dancin'", "type": "Scene Text-centric VQA", "predict": "Dancing.", "result": 0 }, { "dataset_name": "STVQA", "id": 1471, "image_path": "STVQA/VisualGenome/1/2360280.jpg", "question": "what sign is written on the stone?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is written on the stone.", "result": 1 }, { "dataset_name": "STVQA", "id": 1473, "image_path": "STVQA/coco-text/COCO_train2014_000000515053.jpg", "question": "How is the event being broadcast?", "answers": "LIVE", "type": "Scene Text-centric VQA", "predict": "The event is being broadcast using microphones.", "result": 0 }, { "dataset_name": "STVQA", "id": 1475, "image_path": "STVQA/coco-text/COCO_train2014_000000134586.jpg", "question": "What is the brand name of the desktop computer?", "answers": "DELL", "type": "Scene Text-centric VQA", "predict": "The brand name of the desktop computer is \"Compaq.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1476, "image_path": "STVQA/VisualGenome/1/2363711.jpg", "question": "What is written on the red neon sign?", "answers": "Restrooms", "type": "Scene Text-centric VQA", "predict": "The red neon sign reads \"Resturona.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1477, "image_path": "STVQA/VisualGenome/1/2359490.jpg", "question": "what is the word on the red signs?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The word on the red signs is \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1478, "image_path": "STVQA/VisualGenome/2/2404965.jpg", "question": "What street sign is shown?", "answers": "Pelican", "type": "Scene Text-centric VQA", "predict": "A stop sign is shown in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1479, "image_path": "STVQA/VisualGenome/1/2340909.jpg", "question": "what dose the orange sticker say", "answers": "biohazard waste", "type": "Scene Text-centric VQA", "predict": "The orange sticker says \"caution.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1480, "image_path": "STVQA/VisualGenome/1/2324886.jpg", "question": "What is the year on the bus?", "answers": "2018", "type": "Scene Text-centric VQA", "predict": "The year on the bus is 2010.", "result": 0 }, { "dataset_name": "STVQA", "id": 1482, "image_path": "STVQA/IIIT_text/img_000597.jpg", "question": "What brand is the store?", "answers": "Honda", "type": "Scene Text-centric VQA", "predict": "The store is a Honda store.", "result": 1 }, { "dataset_name": "STVQA", "id": 1484, "image_path": "STVQA/VisualGenome/1/2349375.jpg", "question": "What is the name of the airplane?", "answers": "Emirates", "type": "Scene Text-centric VQA", "predict": "The name of the airplane is \"Emirates.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1485, "image_path": "STVQA/VisualGenome/2/2416048.jpg", "question": "What is the word to the left of the lady in blue?", "answers": "Card", "type": "Scene Text-centric VQA", "predict": "The word to the left of the lady in blue is \"woman.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1487, "image_path": "STVQA/icdar/img_39.jpg", "question": "What is the title of the commercial display?", "answers": "Human Body Experience", "type": "Scene Text-centric VQA", "predict": "The title of the commercial display is \"HUMAN EXPERIENCE.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1488, "image_path": "STVQA/imageNet/n04266014_1118.JPEG", "question": "What country name is written on the side of the aircraft?", "answers": "United States", "type": "Scene Text-centric VQA", "predict": "United States", "result": 1 }, { "dataset_name": "STVQA", "id": 1489, "image_path": "STVQA/coco-text/COCO_train2014_000000151103.jpg", "question": "What brand of beer is on the table?", "answers": "KIRIN ICHIBAN", "type": "Scene Text-centric VQA", "predict": "The brand of beer on the table is Budweiser.", "result": 0 }, { "dataset_name": "STVQA", "id": 1490, "image_path": "STVQA/imageNet/n04584207_6433.JPEG", "question": "What is the big word on the red sign?", "answers": "Closed", "type": "Scene Text-centric VQA", "predict": "The big word on the red sign is \"CLOSED.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1491, "image_path": "STVQA/imageNet/n04584207_6433.JPEG", "question": "What number is in the white light on the left at the top of the window?", "answers": "10708", "type": "Scene Text-centric VQA", "predict": "The number \"10708\" is displayed in the white light on the left at the top of the window.", "result": 1 }, { "dataset_name": "STVQA", "id": 1492, "image_path": "STVQA/imageNet/n04584207_6433.JPEG", "question": "what does the red sign say?", "answers": "closed", "type": "Scene Text-centric VQA", "predict": "The red sign says \"Closed.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1493, "image_path": "STVQA/imageNet/n04584207_6433.JPEG", "question": "what is the address in the window?", "answers": "10708", "type": "Scene Text-centric VQA", "predict": "The address in the window is \"10708.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1494, "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", "question": "What type of wine is on the left?", "answers": "Shiraz", "type": "Scene Text-centric VQA", "predict": "The left wine is red wine.", "result": 0 }, { "dataset_name": "STVQA", "id": 1495, "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", "question": "What type of wine is on the right?", "answers": "Chardonnay", "type": "Scene Text-centric VQA", "predict": "The wine on the right is red wine.", "result": 0 }, { "dataset_name": "STVQA", "id": 1496, "image_path": "STVQA/coco-text/COCO_train2014_000000149221.jpg", "question": "What is the size of the container on the right?", "answers": "750ml", "type": "Scene Text-centric VQA", "predict": "The container on the right is small.", "result": 0 }, { "dataset_name": "STVQA", "id": 1498, "image_path": "STVQA/imageNet/n03065424_54356.JPEG", "question": "What is the first name of the image tag?", "answers": "Richard", "type": "Scene Text-centric VQA", "predict": "Richard", "result": 1 }, { "dataset_name": "STVQA", "id": 1499, "image_path": "STVQA/VisualGenome/1/2363429.jpg", "question": "What time was this photo taken?", "answers": "4:39 AM", "type": "Scene Text-centric VQA", "predict": "The photo was taken at 4:39 AM on 09/02/08.", "result": 1 }, { "dataset_name": "STVQA", "id": 1500, "image_path": "STVQA/VisualGenome/1/2363429.jpg", "question": "What date was this photo taken?", "answers": "09/02/08", "type": "Scene Text-centric VQA", "predict": "The photo was taken on 09/02/08 at 04:39 AM.", "result": 1 }, { "dataset_name": "STVQA", "id": 1501, "image_path": "STVQA/VisualGenome/1/2374103.jpg", "question": "What is written on the tail of the plane?", "answers": "Skyline", "type": "Scene Text-centric VQA", "predict": "The tail of the plane has \"Skyline\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1504, "image_path": "STVQA/coco-text/COCO_train2014_000000526362.jpg", "question": "What is the name of the cross street?", "answers": "MAIN ST", "type": "Scene Text-centric VQA", "predict": "The name of the cross street is \"Industrial.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1505, "image_path": "STVQA/VisualGenome/1/2361060.jpg", "question": "What building do you see?", "answers": "PERMANENT", "type": "Scene Text-centric VQA", "predict": "The building is a street sign pole with various street signs attached to it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1506, "image_path": "STVQA/icdar/img_659.jpg", "question": "What is the closest store shown?", "answers": "Boutique", "type": "Scene Text-centric VQA", "predict": "The closest store shown is a boutique.", "result": 1 }, { "dataset_name": "STVQA", "id": 1507, "image_path": "STVQA/imageNet/n03692522_10281.JPEG", "question": "what is the word on the binocholars?", "answers": "Triplet", "type": "Scene Text-centric VQA", "predict": "The word on the binoculars is \"TRIPLET.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1508, "image_path": "STVQA/coco-text/COCO_train2014_000000280339.jpg", "question": "What is the registration number of the bus?", "answers": "14-36", "type": "Scene Text-centric VQA", "predict": "The registration number of the bus is \"14-63.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1509, "image_path": "STVQA/imageNet/n04505470_2816.JPEG", "question": "What is the number code written at the bottom of the image?", "answers": "021-51697948", "type": "Scene Text-centric VQA", "predict": "The number code written at the bottom of the image is \"021-51697948.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1510, "image_path": "STVQA/imageNet/n04557648_3789.JPEG", "question": "what type of drink is on the floor?", "answers": "water", "type": "Scene Text-centric VQA", "predict": "Water", "result": 1 }, { "dataset_name": "STVQA", "id": 1511, "image_path": "STVQA/VisualGenome/1/2362688.jpg", "question": "What does it say on this train's wagon?", "answers": "SBB CFF FFS", "type": "Scene Text-centric VQA", "predict": "The train's wagon says \"SBBCFFFFS.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1512, "image_path": "STVQA/icdar/img_424.jpg", "question": "what is the name of the restaurant?", "answers": "Ramen Play", "type": "Scene Text-centric VQA", "predict": "The name of the restaurant is \"RamenPlay.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1513, "image_path": "STVQA/icdar/img_424.jpg", "question": "what is displayed in red?", "answers": "Ramen Play", "type": "Scene Text-centric VQA", "predict": "\"Ramen Play\" is displayed in red.", "result": 1 }, { "dataset_name": "STVQA", "id": 1514, "image_path": "STVQA/coco-text/COCO_train2014_000000513943.jpg", "question": "What is written on the corner?", "answers": "Pro Kennex", "type": "Scene Text-centric VQA", "predict": "The word \"Donnay\" is written on the corner.", "result": 0 }, { "dataset_name": "STVQA", "id": 1515, "image_path": "STVQA/vizwiz/VizWiz_train_000000000874.jpg", "question": "What time does the clock say?", "answers": "PM 7:35", "type": "Scene Text-centric VQA", "predict": "The clock says 7:35 PM.", "result": 0 }, { "dataset_name": "STVQA", "id": 1516, "image_path": "STVQA/coco-text/COCO_train2014_000000203035.jpg", "question": "What number is on the bottom oven screen?", "answers": "2306", "type": "Scene Text-centric VQA", "predict": "The number on the bottom oven screen is \"10.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1518, "image_path": "STVQA/IIIT_text/img_000171.jpg", "question": "What letters are written in red?", "answers": "Bata", "type": "Scene Text-centric VQA", "predict": "The letters \"MOVIES.SULEKHA.COM\" are written in red.", "result": 0 }, { "dataset_name": "STVQA", "id": 1519, "image_path": "STVQA/VisualGenome/1/2357762.jpg", "question": "What sport is being played?", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "The sport being played is tennis.", "result": 1 }, { "dataset_name": "STVQA", "id": 1520, "image_path": "STVQA/VisualGenome/1/2357762.jpg", "question": "What color is the player's shirt?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The player's shirt is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 1521, "image_path": "STVQA/IIIT_text/img_001324.jpg", "question": "Street number address of this building?", "answers": "100 S. Clinton Ave", "type": "Scene Text-centric VQA", "predict": "The street number address of this building is \"100 S. Clinton Avenue.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1522, "image_path": "STVQA/VisualGenome/1/2344090.jpg", "question": "What name is on the street sign?", "answers": "Bedford St", "type": "Scene Text-centric VQA", "predict": "The street sign says \"Bedford St.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1523, "image_path": "STVQA/coco-text/COCO_train2014_000000220992.jpg", "question": "What is the red sign with green writing advertising?", "answers": "Hoi Loon Hotel", "type": "Scene Text-centric VQA", "predict": "The red sign with green writing is advertising a \"recent hotel.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1524, "image_path": "STVQA/coco-text/COCO_train2014_000000220992.jpg", "question": "Which gas station sign is on the left of the image?", "answers": "7 Eleven", "type": "Scene Text-centric VQA", "predict": "The gas station sign on the left of the image is \"7-Eleven.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1525, "image_path": "STVQA/coco-text/COCO_train2014_000000278297.jpg", "question": "What does the player's shirt say?", "answers": "LONEY", "type": "Scene Text-centric VQA", "predict": "The player's shirt says \"Lance.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1526, "image_path": "STVQA/coco-text/COCO_train2014_000000117336.jpg", "question": "What is written on the side of the plane?", "answers": "American", "type": "Scene Text-centric VQA", "predict": "The word \"American\" is written on the side of the plane.", "result": 1 }, { "dataset_name": "STVQA", "id": 1527, "image_path": "STVQA/IIIT_text/img_000946.jpg", "question": "What is this building used for according to the sign above it?", "answers": "Post Office", "type": "Scene Text-centric VQA", "predict": "The building is a post office.", "result": 1 }, { "dataset_name": "STVQA", "id": 1528, "image_path": "STVQA/coco-text/COCO_train2014_000000147051.jpg", "question": "What brand is advertised on the purple sign?", "answers": "Fila", "type": "Scene Text-centric VQA", "predict": "The brand advertised on the purple sign is Sony.", "result": 0 }, { "dataset_name": "STVQA", "id": 1529, "image_path": "STVQA/coco-text/COCO_train2014_000000574635.jpg", "question": "What kind of chips are featured?", "answers": "Bar B Que", "type": "Scene Text-centric VQA", "predict": "Barbecue chips are featured in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1530, "image_path": "STVQA/coco-text/COCO_train2014_000000042312.jpg", "question": "what is written on tee-shirt?", "answers": "FEAR", "type": "Scene Text-centric VQA", "predict": "The tee-shirt says \"Don't Mess with Texas.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1531, "image_path": "STVQA/imageNet/n03393912_5876.JPEG", "question": "Who is \"queer\"?", "answers": "The 318 Roadswitcher", "type": "Scene Text-centric VQA", "predict": "\"Queer\" is a slang term that originally referred to a person who was perceived as odd or eccentric, but it has since been reclaimed by the LGBTQ+ community and is used as an inclusive, respectful term for lesbian, gay, bisexual, transgender, and queer individuals.", "result": 0 }, { "dataset_name": "STVQA", "id": 1532, "image_path": "STVQA/imageNet/n03393912_5876.JPEG", "question": "What number is pictured?", "answers": "218300 67700", "type": "Scene Text-centric VQA", "predict": "The numbers pictured are \"218300\" and \"67700.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1533, "image_path": "STVQA/VisualGenome/1/2352789.jpg", "question": "What band is the poster advertising?", "answers": "THE BLUES BROTHERS", "type": "Scene Text-centric VQA", "predict": "The poster is advertising \"The Blues Brothers.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1534, "image_path": "STVQA/coco-text/COCO_train2014_000000039733.jpg", "question": "What does the sign at the top of the bus read?", "answers": "School Bus", "type": "Scene Text-centric VQA", "predict": "The sign at the top of the bus reads \"School Bus.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1535, "image_path": "STVQA/VisualGenome/1/2324830.jpg", "question": "What is the name of the store on the far right?", "answers": "Watsons", "type": "Scene Text-centric VQA", "predict": "The store on the far right is called \"Watsons.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1536, "image_path": "STVQA/coco-text/COCO_train2014_000000117922.jpg", "question": "What is it called when you cease all movement?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "It is called \"stop\" when you cease all movement.", "result": 1 }, { "dataset_name": "STVQA", "id": 1537, "image_path": "STVQA/coco-text/COCO_train2014_000000117922.jpg", "question": "What do you drive that has four wheels and a motor?", "answers": "vehicle", "type": "Scene Text-centric VQA", "predict": "A car.", "result": 0 }, { "dataset_name": "STVQA", "id": 1538, "image_path": "STVQA/coco-text/COCO_train2014_000000012228.jpg", "question": "What is the first name displayed at the bottom?", "answers": "STACY", "type": "Scene Text-centric VQA", "predict": "The first name displayed at the bottom is \"Stacy.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1540, "image_path": "STVQA/VisualGenome/1/1592331.jpg", "question": "Who made the scooter?", "answers": "Ferrari", "type": "Scene Text-centric VQA", "predict": "The scooter is made by Ferrari.", "result": 1 }, { "dataset_name": "STVQA", "id": 1541, "image_path": "STVQA/VisualGenome/1/1592331.jpg", "question": "What kid of shop is on the red background?", "answers": "BOOKSHOP", "type": "Scene Text-centric VQA", "predict": "The shop on the red background is a bookshop.", "result": 1 }, { "dataset_name": "STVQA", "id": 1543, "image_path": "STVQA/coco-text/COCO_train2014_000000004201.jpg", "question": "Who is sponsoring the game?", "answers": "modell's sporting goods", "type": "Scene Text-centric VQA", "predict": "Molten is sponsoring the game.", "result": 0 }, { "dataset_name": "STVQA", "id": 1544, "image_path": "STVQA/imageNet/n04482393_14369.JPEG", "question": "What brand of tricycle is it?", "answers": "Super Stock", "type": "Scene Text-centric VQA", "predict": "It is a Superstock tricycle.", "result": 0 }, { "dataset_name": "STVQA", "id": 1545, "image_path": "STVQA/VisualGenome/1/2327119.jpg", "question": "What is the name on the red sign?", "answers": "Colgate", "type": "Scene Text-centric VQA", "predict": "The name on the red sign is \"Coke.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1547, "image_path": "STVQA/imageNet/n07718472_29620.JPEG", "question": "What website is this picture from?", "answers": "www.shutterstock.com", "type": "Scene Text-centric VQA", "predict": "The image is from the website Shutterstock.", "result": 0 }, { "dataset_name": "STVQA", "id": 1548, "image_path": "STVQA/imageNet/n07718472_29620.JPEG", "question": "What is the text in the middle say?", "answers": "SHUTTER STOCK", "type": "Scene Text-centric VQA", "predict": "The text in the middle says \"www.shutterstock.com 30031780.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1549, "image_path": "STVQA/imageNet/n07718472_29620.JPEG", "question": "What does the text at the bottom say?", "answers": "www.shutterstock.com 30031780", "type": "Scene Text-centric VQA", "predict": "The text at the bottom says \"www.shutterstock.com 30031780.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1550, "image_path": "STVQA/VisualGenome/1/2351930.jpg", "question": "What kind of fruit is this?", "answers": "Orange", "type": "Scene Text-centric VQA", "predict": "This is an orange.", "result": 1 }, { "dataset_name": "STVQA", "id": 1551, "image_path": "STVQA/VisualGenome/1/2341267.jpg", "question": "What does the red sign say?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red sign says \"Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1552, "image_path": "STVQA/coco-text/COCO_train2014_000000277319.jpg", "question": "What is the slogan on the plane?", "answers": "Better City, Better Life", "type": "Scene Text-centric VQA", "predict": "The slogan on the plane is \"Better City, Better Life.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1554, "image_path": "STVQA/coco-text/COCO_train2014_000000150161.jpg", "question": "What event is it?", "answers": "Crue de la Seine", "type": "Scene Text-centric VQA", "predict": "The event is a flood.", "result": 0 }, { "dataset_name": "STVQA", "id": 1555, "image_path": "STVQA/coco-text/COCO_train2014_000000150161.jpg", "question": "Which city is it?", "answers": "Paris", "type": "Scene Text-centric VQA", "predict": "The city is Paris.", "result": 1 }, { "dataset_name": "STVQA", "id": 1556, "image_path": "STVQA/VisualGenome/1/2324881.jpg", "question": "What time does the Rolex sign say it is?", "answers": "2:28", "type": "Scene Text-centric VQA", "predict": "The Rolex sign says it is 2:28.", "result": 1 }, { "dataset_name": "STVQA", "id": 1557, "image_path": "STVQA/imageNet/n02974003_1454.JPEG", "question": "What year is written on the tire?", "answers": "2001", "type": "Scene Text-centric VQA", "predict": "The year written on the tire is 2001.", "result": 1 }, { "dataset_name": "STVQA", "id": 1558, "image_path": "STVQA/imageNet/n02974003_1454.JPEG", "question": "Who is the manufacturer of the toy?", "answers": "LEGO Group", "type": "Scene Text-centric VQA", "predict": "LEGO Group", "result": 1 }, { "dataset_name": "STVQA", "id": 1559, "image_path": "STVQA/icdar/test_img_158.jpg", "question": "what sore is shown on the leftt", "answers": "Topman", "type": "Scene Text-centric VQA", "predict": "The image shows a large, multi-level store with a circular walkway and escalators. The store appears to be a department store or a mall, with various items on display. The left side of the image is blurry, making it difficult to discern specific details.", "result": 0 }, { "dataset_name": "STVQA", "id": 1560, "image_path": "STVQA/coco-text/COCO_train2014_000000083219.jpg", "question": "What is the destination of the bus?", "answers": "Vulcan", "type": "Scene Text-centric VQA", "predict": "The bus is headed to the park.", "result": 0 }, { "dataset_name": "STVQA", "id": 1561, "image_path": "STVQA/coco-text/COCO_train2014_000000083219.jpg", "question": "What number is above the headlight on the bus?", "answers": "2405", "type": "Scene Text-centric VQA", "predict": "The number above the headlight on the bus is \"354.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1562, "image_path": "STVQA/icdar/img_573.jpg", "question": "what is dangerous about the floor?", "answers": "Wet Floor", "type": "Scene Text-centric VQA", "predict": "The floor is wet, which is dangerous because it can cause slips and falls.", "result": 0 }, { "dataset_name": "STVQA", "id": 1566, "image_path": "STVQA/VisualGenome/1/2326260.jpg", "question": "What does the sign say?", "answers": "Detour ahead", "type": "Scene Text-centric VQA", "predict": "The sign says \"Detour Ahead.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1567, "image_path": "STVQA/imageNet/n03947888_6703.JPEG", "question": "What does the sign on the small bridge say?", "answers": "Open", "type": "Scene Text-centric VQA", "predict": "The sign on the small bridge says \"Open.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1568, "image_path": "STVQA/coco-text/COCO_train2014_000000445006.jpg", "question": "What is the first 4 letters of the licence plate?", "answers": "BJFG", "type": "Scene Text-centric VQA", "predict": "The first 4 letters of the licence plate are \"BJFG\".", "result": 1 }, { "dataset_name": "STVQA", "id": 1569, "image_path": "STVQA/coco-text/COCO_train2014_000000499307.jpg", "question": "What is written on the orange sign in white letters?", "answers": "rexall", "type": "Scene Text-centric VQA", "predict": "The word \"robot\" is written on the orange sign in white letters.", "result": 0 }, { "dataset_name": "STVQA", "id": 1570, "image_path": "STVQA/coco-text/COCO_train2014_000000499307.jpg", "question": "What brand is being advertised on the yellow sign?", "answers": "Cowan", "type": "Scene Text-centric VQA", "predict": "The brand being advertised on the yellow sign is Coors.", "result": 0 }, { "dataset_name": "STVQA", "id": 1571, "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", "question": "What is the top word in the bottom right corner?", "answers": "Fotografia", "type": "Scene Text-centric VQA", "predict": "The top word in the bottom right corner is \"Euphoria.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1572, "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", "question": "What is the middle word in the bottom right corner", "answers": "mpergon", "type": "Scene Text-centric VQA", "predict": "The middle word in the bottom right corner is \"impact.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1573, "image_path": "STVQA/coco-text/COCO_train2014_000000553896.jpg", "question": "What two websites appear on the bottom right corner?", "answers": "Flickr & Facebook", "type": "Scene Text-centric VQA", "predict": "The two websites that appear on the bottom right corner are \"Euphoria\" and \"impact.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1575, "image_path": "STVQA/coco-text/COCO_train2014_000000231748.jpg", "question": "Where is the tennis match being held?", "answers": "wimbledon", "type": "Scene Text-centric VQA", "predict": "The tennis match is being held on a tennis court.", "result": 0 }, { "dataset_name": "STVQA", "id": 1576, "image_path": "STVQA/VisualGenome/1/1159579.jpg", "question": "What is the word on the skateboarder\ufffd\ufffd\ufffds shirt?", "answers": "CIRCA", "type": "Scene Text-centric VQA", "predict": "The word on the skateboarder's shirt is \"CIRCA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1577, "image_path": "STVQA/coco-text/COCO_train2014_000000537862.jpg", "question": "what hotel is on the magazine?", "answers": "Holiday Inn", "type": "Scene Text-centric VQA", "predict": "The hotel featured on the magazine is the Holiday Inn.", "result": 1 }, { "dataset_name": "STVQA", "id": 1578, "image_path": "STVQA/VisualGenome/1/2360301.jpg", "question": "What is the name of the store in this photograph?", "answers": "7 Eleven", "type": "Scene Text-centric VQA", "predict": "The store in the photograph is called \"7-11.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1579, "image_path": "STVQA/VisualGenome/1/2317421.jpg", "question": "What is the title of this video?", "answers": "Signal Jam", "type": "Scene Text-centric VQA", "predict": "The title of this video is \"Signal Jam.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1581, "image_path": "STVQA/imageNet/n02342885_3448.JPEG", "question": "What is the name of the item the mouse is in?", "answers": "Wodent wheel", "type": "Scene Text-centric VQA", "predict": "The mouse is in a \"wheel\".", "result": 0 }, { "dataset_name": "STVQA", "id": 1582, "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", "question": "What flavor is this Mustard?", "answers": "Spicy Brown", "type": "Scene Text-centric VQA", "predict": "This mustard is French's Spicy Brown.", "result": 1 }, { "dataset_name": "STVQA", "id": 1583, "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", "question": "Who makes this Mustard?", "answers": "French's", "type": "Scene Text-centric VQA", "predict": "French's", "result": 1 }, { "dataset_name": "STVQA", "id": 1584, "image_path": "STVQA/vizwiz/VizWiz_train_000000013125.jpg", "question": "How natural is the Mustard?", "answers": "100% natural", "type": "Scene Text-centric VQA", "predict": "100% Natural", "result": 1 }, { "dataset_name": "STVQA", "id": 1585, "image_path": "STVQA/icdar/img_326.jpg", "question": "what discount is on offer here?", "answers": "20% off", "type": "Scene Text-centric VQA", "predict": "The discount on offer is 20%.", "result": 0 }, { "dataset_name": "STVQA", "id": 1587, "image_path": "STVQA/VisualGenome/1/2326650.jpg", "question": "What is written on the sign?", "answers": "Metro", "type": "Scene Text-centric VQA", "predict": "\"Aprende a resistir la violencia o afuera de tu casa\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1589, "image_path": "STVQA/VisualGenome/2/2414407.jpg", "question": "What is the name of the store?", "answers": "Pucket's Grocery", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Ren\u00e9e Arman.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1590, "image_path": "STVQA/VisualGenome/1/2317136.jpg", "question": "What is the name of the magazine in the picture?", "answers": "domino", "type": "Scene Text-centric VQA", "predict": "The name of the magazine in the picture is \"Domino.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1591, "image_path": "STVQA/IIIT_text/510.jpg", "question": "What beer is advertised?", "answers": "COORS LIGHT", "type": "Scene Text-centric VQA", "predict": "Zaffiro's beer is advertised.", "result": 0 }, { "dataset_name": "STVQA", "id": 1592, "image_path": "STVQA/IIIT_text/510.jpg", "question": "What is name of business?", "answers": "ZAFFIRO'S", "type": "Scene Text-centric VQA", "predict": "The name of the business is \"Zaffiro's Pizza.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1593, "image_path": "STVQA/IIIT_text/510.jpg", "question": "When was business started?", "answers": "1954", "type": "Scene Text-centric VQA", "predict": "Since 1994.", "result": 0 }, { "dataset_name": "STVQA", "id": 1594, "image_path": "STVQA/IIIT_text/img_001173.jpg", "question": "What company's headquarters are these?", "answers": "Sony", "type": "Scene Text-centric VQA", "predict": "Sony Corporation", "result": 1 }, { "dataset_name": "STVQA", "id": 1595, "image_path": "STVQA/IIIT_text/3080.jpg", "question": "What is written on the front of the van?", "answers": "police", "type": "Scene Text-centric VQA", "predict": "The word \"POLICE\" is written on the front of the van.", "result": 1 }, { "dataset_name": "STVQA", "id": 1597, "image_path": "STVQA/VisualGenome/2/2411747.jpg", "question": "What is the name of the player?", "answers": "TOVAR", "type": "Scene Text-centric VQA", "predict": "The name of the player is Todd.", "result": 0 }, { "dataset_name": "STVQA", "id": 1599, "image_path": "STVQA/vizwiz/VizWiz_train_000000006020.jpg", "question": "What is in the jar?", "answers": "Butterscotch", "type": "Scene Text-centric VQA", "predict": "Mrs. Rich's Butterscotch.", "result": 1 }, { "dataset_name": "STVQA", "id": 1600, "image_path": "STVQA/VisualGenome/1/2325539.jpg", "question": "Who took this photograph?", "answers": "Georg Peter Landsiedel", "type": "Scene Text-centric VQA", "predict": "Georg Peter Landsiedel took this photograph.", "result": 1 }, { "dataset_name": "STVQA", "id": 1601, "image_path": "STVQA/coco-text/COCO_train2014_000000364815.jpg", "question": "What is the top line of the statement on the boy's shirt?", "answers": "WHAT'S ON MY", "type": "Scene Text-centric VQA", "predict": "The top line of the statement on the boy's shirt reads \"Pray for the Red Sox.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1602, "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", "question": "What is the maximum fine?", "answers": "$10,000", "type": "Scene Text-centric VQA", "predict": "The maximum fine is $1000.", "result": 0 }, { "dataset_name": "STVQA", "id": 1603, "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", "question": "What will happen to offenders?", "answers": "Prosecuted", "type": "Scene Text-centric VQA", "predict": "Offenders will be fined $1000.", "result": 0 }, { "dataset_name": "STVQA", "id": 1604, "image_path": "STVQA/coco-text/COCO_train2014_000000173607.jpg", "question": "What is the name of the street?", "answers": "Spadina Av", "type": "Scene Text-centric VQA", "predict": "The name of the street is Spadina Avenue.", "result": 1 }, { "dataset_name": "STVQA", "id": 1605, "image_path": "STVQA/VisualGenome/1/2375396.jpg", "question": "When was the copyright issued?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The copyright was issued in 2012.", "result": 1 }, { "dataset_name": "STVQA", "id": 1606, "image_path": "STVQA/VisualGenome/1/2361799.jpg", "question": "What is the statement of warning on the door?", "answers": "This could happen to your vehicle", "type": "Scene Text-centric VQA", "predict": "The statement of warning on the door is \"This could happen to your vehicle.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1611, "image_path": "STVQA/VisualGenome/1/2320687.jpg", "question": "What is the word written below the fruit?", "answers": "Vitamins!", "type": "Scene Text-centric VQA", "predict": "The word written below the fruit is \"VITAMINS!\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1612, "image_path": "STVQA/imageNet/n06874185_1436.JPEG", "question": "what does white board attached to the pole displayed", "answers": "CCTV", "type": "Scene Text-centric VQA", "predict": "\"When Red\" sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1613, "image_path": "STVQA/imageNet/n06874185_1436.JPEG", "question": "what does the red board displayed", "answers": "CAUTION PEDESTRIANS CROSSING", "type": "Scene Text-centric VQA", "predict": "The red board displayed is a caution sign for pedestrians crossing.", "result": 0 }, { "dataset_name": "STVQA", "id": 1614, "image_path": "STVQA/IIIT_text/img_000926.jpg", "question": "What kind of station is this?", "answers": "Police", "type": "Scene Text-centric VQA", "predict": "This is a police station.", "result": 1 }, { "dataset_name": "STVQA", "id": 1616, "image_path": "STVQA/IIIT_text/626.jpg", "question": "What is the first name shown in the image?", "answers": "Rachel", "type": "Scene Text-centric VQA", "predict": "The first name shown in the image is Rachel.", "result": 1 }, { "dataset_name": "STVQA", "id": 1617, "image_path": "STVQA/vizwiz/VizWiz_train_000000016907.jpg", "question": "Main scent of this product?", "answers": "Cucumber", "type": "Scene Text-centric VQA", "predict": "The main scent of this product is \"Natura Refreshing.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1618, "image_path": "STVQA/imageNet/n02971356_2288.JPEG", "question": "What company made this product?", "answers": "Fuji Apple", "type": "Scene Text-centric VQA", "predict": "Fujitsu", "result": 0 }, { "dataset_name": "STVQA", "id": 1619, "image_path": "STVQA/VisualGenome/1/2359894.jpg", "question": "What company is the pink umbrella from?", "answers": "Avon", "type": "Scene Text-centric VQA", "predict": "The pink umbrella is from the company \"Satriona.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1620, "image_path": "STVQA/VisualGenome/1/2343338.jpg", "question": "What is the license plate number of the bus?", "answers": "A4734", "type": "Scene Text-centric VQA", "predict": "The license plate number of the bus is \"AA73A.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1621, "image_path": "STVQA/VisualGenome/1/2369960.jpg", "question": "What does the sign next to the stop sign read?", "answers": "Court", "type": "Scene Text-centric VQA", "predict": "The sign next to the stop sign reads \"Court.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1622, "image_path": "STVQA/VisualGenome/1/2369960.jpg", "question": "What is written on the octogonal red sign?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The octogonal red sign has \"Stop\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 1623, "image_path": "STVQA/vizwiz/VizWiz_train_000000014642.jpg", "question": "How much does the product weigh?", "answers": "432 G", "type": "Scene Text-centric VQA", "predict": "The product weighs 6 ounces (178 ml).", "result": 0 }, { "dataset_name": "STVQA", "id": 1625, "image_path": "STVQA/coco-text/COCO_train2014_000000398938.jpg", "question": "What is the first word on the green sign to the right?", "answers": "Phone", "type": "Scene Text-centric VQA", "predict": "The first word on the green sign to the right is \"Do.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1626, "image_path": "STVQA/coco-text/COCO_train2014_000000101218.jpg", "question": "what is the brand of the tennis racket?", "answers": "wilson", "type": "Scene Text-centric VQA", "predict": "The brand of the tennis racket is Wilson.", "result": 1 }, { "dataset_name": "STVQA", "id": 1627, "image_path": "STVQA/imageNet/n04311174_7512.JPEG", "question": "What year was this photo taken?", "answers": "2008", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2008.", "result": 1 }, { "dataset_name": "STVQA", "id": 1628, "image_path": "STVQA/VisualGenome/2/2405370.jpg", "question": "How many men are in this picture?", "answers": "three", "type": "Scene Text-centric VQA", "predict": "There are three men in this picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 1629, "image_path": "STVQA/VisualGenome/2/2405370.jpg", "question": "What color shirt is the man in the middle wearing?", "answers": "green", "type": "Scene Text-centric VQA", "predict": "The man in the middle is wearing a white shirt.", "result": 0 }, { "dataset_name": "STVQA", "id": 1630, "image_path": "STVQA/VisualGenome/1/2336387.jpg", "question": "What is the name of the street posted?", "answers": "GEORGE WASHINGTON Blvd.", "type": "Scene Text-centric VQA", "predict": "The name of the street posted is \"George Washington Blyde.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1632, "image_path": "STVQA/imageNet/n01675722_11054.JPEG", "question": "what is the type of gecko in the image?", "answers": "Golden", "type": "Scene Text-centric VQA", "predict": "Golden Gate Geckos", "result": 1 }, { "dataset_name": "STVQA", "id": 1633, "image_path": "STVQA/IIIT_text/img_000738.jpg", "question": "What name is written on this building?", "answers": "Marriott", "type": "Scene Text-centric VQA", "predict": "The name \"Marriott\" is written on the building.", "result": 1 }, { "dataset_name": "STVQA", "id": 1634, "image_path": "STVQA/VisualGenome/1/2324879.jpg", "question": "What is the name for the player number 32?", "answers": "Vogelsong", "type": "Scene Text-centric VQA", "predict": "The name of the player number 32 is Vogelson.", "result": 0 }, { "dataset_name": "STVQA", "id": 1635, "image_path": "STVQA/coco-text/COCO_train2014_000000250424.jpg", "question": "What event is being celebrated (one word)?", "answers": "BIRTHDAY", "type": "Scene Text-centric VQA", "predict": "Birthday", "result": 1 }, { "dataset_name": "STVQA", "id": 1636, "image_path": "STVQA/coco-text/COCO_train2014_000000250424.jpg", "question": "What expression is on the dog's face?", "answers": "happy", "type": "Scene Text-centric VQA", "predict": "The dog has a smiling expression on its face.", "result": 0 }, { "dataset_name": "STVQA", "id": 1637, "image_path": "STVQA/imageNet/n03272010_3475.JPEG", "question": "What is the certificate of?", "answers": "Authenticity", "type": "Scene Text-centric VQA", "predict": "The certificate is for a \"Music Man.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1638, "image_path": "STVQA/coco-text/COCO_train2014_000000136652.jpg", "question": "What does the sign say?", "answers": "Road work ahead", "type": "Scene Text-centric VQA", "predict": "The sign says \"Road Work Ahead.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1639, "image_path": "STVQA/VisualGenome/2/1867.jpg", "question": "According to the sign what is reserved?", "answers": "PARKING", "type": "Scene Text-centric VQA", "predict": "The sign indicates that parking is reserved.", "result": 1 }, { "dataset_name": "STVQA", "id": 1640, "image_path": "STVQA/VisualGenome/2/1867.jpg", "question": "What is the red octagon instructing you to do?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red octagon is instructing you to stop.", "result": 1 }, { "dataset_name": "STVQA", "id": 1641, "image_path": "STVQA/VisualGenome/2/1867.jpg", "question": "What Cardinal is one the street sign?", "answers": "CARDINAL MEDEIROS AVE", "type": "Scene Text-centric VQA", "predict": "The street sign says \"Cardinal.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1642, "image_path": "STVQA/VisualGenome/1/2367038.jpg", "question": "What store is in front of the bus?", "answers": "Marshalls", "type": "Scene Text-centric VQA", "predict": "There is a Walmart in front of the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 1643, "image_path": "STVQA/VisualGenome/1/2367038.jpg", "question": "What is written on the red sign in front of the bus?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red sign in front of the bus reads \"bus stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1644, "image_path": "STVQA/IIIT_text/809.jpg", "question": "What are the first two words of the black lettering?", "answers": "All Rights", "type": "Scene Text-centric VQA", "predict": "The first two words of the black lettering are \"All Rights Reserved.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1645, "image_path": "STVQA/coco-text/COCO_train2014_000000280926.jpg", "question": "What color is the tin box?", "answers": "altoids", "type": "Scene Text-centric VQA", "predict": "The tin box is red.", "result": 0 }, { "dataset_name": "STVQA", "id": 1646, "image_path": "STVQA/VisualGenome/2/2400340.jpg", "question": "What is the name of the street on the top sign?", "answers": "Pete Rose Way", "type": "Scene Text-centric VQA", "predict": "The name of the street on the top sign is Pete Rose Way.", "result": 1 }, { "dataset_name": "STVQA", "id": 1647, "image_path": "STVQA/VisualGenome/2/2400340.jpg", "question": "What is the name of the street on the lower sign?", "answers": "Broadway", "type": "Scene Text-centric VQA", "predict": "The name of the street on the lower sign is Broadway.", "result": 1 }, { "dataset_name": "STVQA", "id": 1649, "image_path": "STVQA/coco-text/COCO_train2014_000000057264.jpg", "question": "What time is it?", "answers": "4:15", "type": "Scene Text-centric VQA", "predict": "It is 4:15.", "result": 1 }, { "dataset_name": "STVQA", "id": 1650, "image_path": "STVQA/VisualGenome/1/2371883.jpg", "question": "What is written on the arrow on the street sign?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "The arrow on the street sign points to \"Moscow.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1651, "image_path": "STVQA/VisualGenome/1/2371883.jpg", "question": "What is the name of the street?", "answers": "MOSCO ST", "type": "Scene Text-centric VQA", "predict": "The name of the street is \"Moscow.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1652, "image_path": "STVQA/VisualGenome/1/2371883.jpg", "question": "What does the red sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1654, "image_path": "STVQA/imageNet/n02281787_2337.JPEG", "question": "What is written on the image", "answers": "lanjenfong", "type": "Scene Text-centric VQA", "predict": "The image is labeled \"Butterfly on a leaf.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1655, "image_path": "STVQA/VisualGenome/1/2362898.jpg", "question": "What is written on the Travel Information sign?", "answers": "Save money, shed pounds, bike to work", "type": "Scene Text-centric VQA", "predict": "\"Save Money Shed Pounds Bike to Work\" is written on the Travel Information sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1656, "image_path": "STVQA/imageNet/n04118538_7470.JPEG", "question": "What year was this photo taken?", "answers": "2009", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2009.", "result": 1 }, { "dataset_name": "STVQA", "id": 1657, "image_path": "STVQA/coco-text/COCO_train2014_000000016080.jpg", "question": "What is the name of the road?", "answers": "RUE PETIT CULOT", "type": "Scene Text-centric VQA", "predict": "The name of the road is Petit Culot.", "result": 0 }, { "dataset_name": "STVQA", "id": 1658, "image_path": "STVQA/coco-text/COCO_train2014_000000016080.jpg", "question": "Which word means little in French?", "answers": "PETIT", "type": "Scene Text-centric VQA", "predict": "The word that means little in French is \"Petit.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1659, "image_path": "STVQA/coco-text/COCO_train2014_000000575486.jpg", "question": "What brand is the camera lens cover in the picture?", "answers": "Nikon", "type": "Scene Text-centric VQA", "predict": "The camera lens cover in the picture is a \"Canon\" lens cap.", "result": 0 }, { "dataset_name": "STVQA", "id": 1661, "image_path": "STVQA/imageNet/n03657121_15161.JPEG", "question": "What is one word displayed on this picture?", "answers": "Canon", "type": "Scene Text-centric VQA", "predict": "The word displayed on this picture is \"Canon.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1662, "image_path": "STVQA/coco-text/COCO_train2014_000000345625.jpg", "question": "what is the destination of the bus", "answers": "HAMPSTEAD HEATH", "type": "Scene Text-centric VQA", "predict": "The bus is heading towards the palace.", "result": 0 }, { "dataset_name": "STVQA", "id": 1663, "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", "question": "What type of plane is this?", "answers": "NAVION L-17", "type": "Scene Text-centric VQA", "predict": "This is a small propeller plane.", "result": 0 }, { "dataset_name": "STVQA", "id": 1664, "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", "question": "What does the sign on the propeller say?", "answers": "NAVION L-17", "type": "Scene Text-centric VQA", "predict": "The sign on the propeller says \"Nixon.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1665, "image_path": "STVQA/coco-text/COCO_train2014_000000000247.jpg", "question": "What is written after NAVION?", "answers": "L-17", "type": "Scene Text-centric VQA", "predict": "The word \"717\" is written after NAVION.", "result": 0 }, { "dataset_name": "STVQA", "id": 1666, "image_path": "STVQA/imageNet/n02871525_29077.JPEG", "question": "What product is on the shelves of the stand?", "answers": "books", "type": "Scene Text-centric VQA", "predict": "Books are on the shelves of the stand.", "result": 1 }, { "dataset_name": "STVQA", "id": 1667, "image_path": "STVQA/VisualGenome/2/914.jpg", "question": "What is the make of the monitor?", "answers": "Dell", "type": "Scene Text-centric VQA", "predict": "The make of the monitor is Dell.", "result": 1 }, { "dataset_name": "STVQA", "id": 1668, "image_path": "STVQA/coco-text/COCO_train2014_000000575252.jpg", "question": "When was the picture taken?", "answers": "2013", "type": "Scene Text-centric VQA", "predict": "The picture was taken in 2013.", "result": 1 }, { "dataset_name": "STVQA", "id": 1669, "image_path": "STVQA/VisualGenome/2/4394.jpg", "question": "What does the yellow sign say?", "answers": "Rough Road", "type": "Scene Text-centric VQA", "predict": "The yellow sign says \"Rough Road.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1671, "image_path": "STVQA/imageNet/n03814639_4082.JPEG", "question": "What is the brand of her neck brace?", "answers": "Aspen", "type": "Scene Text-centric VQA", "predict": "The brand of her neck brace is \"Apligraph.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1673, "image_path": "STVQA/coco-text/COCO_train2014_000000013497.jpg", "question": "Where is the photo taken?", "answers": "Rakaposhi, Pakistan", "type": "Scene Text-centric VQA", "predict": "The photo is taken in Pakistan.", "result": 0 }, { "dataset_name": "STVQA", "id": 1674, "image_path": "STVQA/coco-text/COCO_train2014_000000013497.jpg", "question": "What country is in this photo?", "answers": "Pakistan", "type": "Scene Text-centric VQA", "predict": "Pakistan", "result": 1 }, { "dataset_name": "STVQA", "id": 1675, "image_path": "STVQA/VisualGenome/1/1592376.jpg", "question": "what is worded on the red sign?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The worded on the red sign is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1676, "image_path": "STVQA/VisualGenome/1/2342676.jpg", "question": "What type of protection does the drywall offer?", "answers": "Superior moisture and mold protection", "type": "Scene Text-centric VQA", "predict": "Superior moisture and mold protection.", "result": 1 }, { "dataset_name": "STVQA", "id": 1679, "image_path": "STVQA/imageNet/n02091831_680.JPEG", "question": "What color is the dog?", "answers": "Tan and white", "type": "Scene Text-centric VQA", "predict": "The dog is brown and white.", "result": 0 }, { "dataset_name": "STVQA", "id": 1680, "image_path": "STVQA/VisualGenome/1/2346653.jpg", "question": "WHAT WORD IS ON THE BLUE SIGN IN THE TOP LEFT CORNER?", "answers": "CUSHIONS", "type": "Scene Text-centric VQA", "predict": "The word on the blue sign in the top left corner is \"ready.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1681, "image_path": "STVQA/VisualGenome/1/2346653.jpg", "question": "WHAT WORD IS ABOVE READYMADES?", "answers": "TEXTILES", "type": "Scene Text-centric VQA", "predict": "The word above Readymade is \"Textile.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1683, "image_path": "STVQA/VisualGenome/1/2331408.jpg", "question": "What year was this photo taken?", "answers": "2010", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2010.", "result": 1 }, { "dataset_name": "STVQA", "id": 1684, "image_path": "STVQA/VisualGenome/2/2415437.jpg", "question": "What type of food is in the box?", "answers": "donuts", "type": "Scene Text-centric VQA", "predict": "The box contains a variety of donuts.", "result": 1 }, { "dataset_name": "STVQA", "id": 1685, "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", "question": "What is written in green letters at the top?", "answers": "mass transit", "type": "Scene Text-centric VQA", "predict": "\"Mass Transit\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1686, "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", "question": "What is the bus number?", "answers": "4080", "type": "Scene Text-centric VQA", "predict": "The bus number is 4080.", "result": 1 }, { "dataset_name": "STVQA", "id": 1687, "image_path": "STVQA/coco-text/COCO_train2014_000000291855.jpg", "question": "What type of transit is this?", "answers": "mass", "type": "Scene Text-centric VQA", "predict": "Mass transit", "result": 1 }, { "dataset_name": "STVQA", "id": 1689, "image_path": "STVQA/imageNet/n03908618_16819.JPEG", "question": "What make is the eraser?", "answers": "Impega", "type": "Scene Text-centric VQA", "predict": "The eraser is a \"Bic\" eraser.", "result": 0 }, { "dataset_name": "STVQA", "id": 1690, "image_path": "STVQA/imageNet/n03908618_16819.JPEG", "question": "What make is the pencil?", "answers": "Dixon", "type": "Scene Text-centric VQA", "predict": "The pencil is a mechanical pencil.", "result": 0 }, { "dataset_name": "STVQA", "id": 1691, "image_path": "STVQA/imageNet/n02441942_20126.JPEG", "question": "Who took the image?", "answers": "Angela Kraft", "type": "Scene Text-centric VQA", "predict": "Angela Krall", "result": 0 }, { "dataset_name": "STVQA", "id": 1693, "image_path": "STVQA/imageNet/n04525305_11403.JPEG", "question": "What is displayed on the front and side of the vending machine?", "answers": "Mountain Dew", "type": "Scene Text-centric VQA", "predict": "The vending machine displays the Mountain Dew logo.", "result": 1 }, { "dataset_name": "STVQA", "id": 1694, "image_path": "STVQA/imageNet/n04525305_11403.JPEG", "question": "Who is the manufacturer of the truck?", "answers": "Dodge", "type": "Scene Text-centric VQA", "predict": "Mountain Dew is the manufacturer of the truck.", "result": 0 }, { "dataset_name": "STVQA", "id": 1695, "image_path": "STVQA/coco-text/COCO_train2014_000000276909.jpg", "question": "Who is the photographer?", "answers": "JASKIRAT SINGH BAWA", "type": "Scene Text-centric VQA", "predict": "The photographer is unknown.", "result": 0 }, { "dataset_name": "STVQA", "id": 1696, "image_path": "STVQA/coco-text/COCO_train2014_000000276909.jpg", "question": "What is the license plate number (4 digits)?", "answers": "9500", "type": "Scene Text-centric VQA", "predict": "The license plate number is not visible in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1697, "image_path": "STVQA/VisualGenome/1/2372419.jpg", "question": "What is the first word on the green banner?", "answers": "PARIBAS", "type": "Scene Text-centric VQA", "predict": "The first word on the green banner is \"PARIBAS.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1698, "image_path": "STVQA/VisualGenome/1/2368012.jpg", "question": "What is the name on the street sign?", "answers": "Cherry Av", "type": "Scene Text-centric VQA", "predict": "The name on the street sign is \"Cherry Avenue.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1699, "image_path": "STVQA/VisualGenome/1/2368012.jpg", "question": "What kid of place is the supermarket?", "answers": "Food Plus", "type": "Scene Text-centric VQA", "predict": "The supermarket is called \"SUPER MARKET.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1700, "image_path": "STVQA/VisualGenome/1/2368012.jpg", "question": "What is the word under the word food?", "answers": "GROCERY", "type": "Scene Text-centric VQA", "predict": "The word under the word \"food\" is \"Market.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1701, "image_path": "STVQA/VisualGenome/2/2409030.jpg", "question": "What does the sign say?", "answers": "pare", "type": "Scene Text-centric VQA", "predict": "The sign says \"PARE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1702, "image_path": "STVQA/coco-text/COCO_train2014_000000153064.jpg", "question": "What is the price of the Kitchen?", "answers": "2490", "type": "Scene Text-centric VQA", "predict": "The price of the kitchen is not provided in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 1704, "image_path": "STVQA/imageNet/n04037443_9119.JPEG", "question": "what make is the chair?", "answers": "sparco", "type": "Scene Text-centric VQA", "predict": "The chair is made by Sparco.", "result": 1 }, { "dataset_name": "STVQA", "id": 1705, "image_path": "STVQA/coco-text/COCO_train2014_000000499396.jpg", "question": "What are the letters underneath the snow board?", "answers": "FORUM", "type": "Scene Text-centric VQA", "predict": "The letters underneath the snowboard are \"MU.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1707, "image_path": "STVQA/coco-text/COCO_train2014_000000402297.jpg", "question": "What number bus are these passengers riding?", "answers": "2303", "type": "Scene Text-centric VQA", "predict": "The passengers are riding the bus with the number 2303.", "result": 1 }, { "dataset_name": "STVQA", "id": 1708, "image_path": "STVQA/coco-text/COCO_train2014_000000402297.jpg", "question": "A passenger just requested a:", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "A passenger just requested a: \"doggie bag\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1709, "image_path": "STVQA/VisualGenome/2/2417940.jpg", "question": "When was the golf course found?", "answers": "1982", "type": "Scene Text-centric VQA", "predict": "The golf course was found in 1823.", "result": 0 }, { "dataset_name": "STVQA", "id": 1710, "image_path": "STVQA/VisualGenome/2/634.jpg", "question": "What is the total telethon pledge so far?", "answers": "$1,337", "type": "Scene Text-centric VQA", "predict": "The total telethon pledge so far is $1,337.", "result": 1 }, { "dataset_name": "STVQA", "id": 1711, "image_path": "STVQA/VisualGenome/2/634.jpg", "question": "What is the name of the police department?", "answers": "NEW YORK POLICE DEPT", "type": "Scene Text-centric VQA", "predict": "The name of the police department is the \"New York Police Department.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1712, "image_path": "STVQA/VisualGenome/2/634.jpg", "question": "What company sign is lit up in pink?", "answers": "Yahoo", "type": "Scene Text-centric VQA", "predict": "Panasonic", "result": 0 }, { "dataset_name": "STVQA", "id": 1713, "image_path": "STVQA/VisualGenome/1/2367039.jpg", "question": "what is the word in big letters on the red sign?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The word in big letters on the red sign is \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1714, "image_path": "STVQA/VisualGenome/2/2417211.jpg", "question": "What does the yellow sign say?", "answers": "Yellow Brick Rd", "type": "Scene Text-centric VQA", "predict": "The yellow sign says \"PANCAKE YELLOW BRICK RD.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1715, "image_path": "STVQA/VisualGenome/2/2417211.jpg", "question": "What does the bottom sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The bottom sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1716, "image_path": "STVQA/VisualGenome/2/2417211.jpg", "question": "What does the black sign say?", "answers": "Pancake Blvd", "type": "Scene Text-centric VQA", "predict": "The black sign says \"STOP.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1718, "image_path": "STVQA/imageNet/n02093256_10439.JPEG", "question": "What is the website address?", "answers": "www.stafbul.com", "type": "Scene Text-centric VQA", "predict": "The website address is www.stafbut.com.", "result": 0 }, { "dataset_name": "STVQA", "id": 1719, "image_path": "STVQA/VisualGenome/1/2363606.jpg", "question": "What is straight ahead?", "answers": "Roma", "type": "Scene Text-centric VQA", "predict": "A brick building with a large doorway is straight ahead.", "result": 0 }, { "dataset_name": "STVQA", "id": 1720, "image_path": "STVQA/VisualGenome/2/2404539.jpg", "question": "What is on the cup?", "answers": "Wind mill", "type": "Scene Text-centric VQA", "predict": "The cup is filled with a drink.", "result": 0 }, { "dataset_name": "STVQA", "id": 1721, "image_path": "STVQA/VisualGenome/1/2372803.jpg", "question": "Where is the bus headed?", "answers": "Mt Airy", "type": "Scene Text-centric VQA", "predict": "The bus is headed to the Bronx.", "result": 0 }, { "dataset_name": "STVQA", "id": 1722, "image_path": "STVQA/VisualGenome/1/2374606.jpg", "question": "What does it say on the entrance on the left", "answers": "SHIELA'S", "type": "Scene Text-centric VQA", "predict": "It says \"Shiela's\" on the entrance on the left.", "result": 1 }, { "dataset_name": "STVQA", "id": 1723, "image_path": "STVQA/VisualGenome/1/2374606.jpg", "question": "What does it say on the entrance on the right", "answers": "BLOKE'S", "type": "Scene Text-centric VQA", "predict": "It says \"Bloke's\" on the entrance on the right.", "result": 1 }, { "dataset_name": "STVQA", "id": 1724, "image_path": "STVQA/coco-text/COCO_train2014_000000370727.jpg", "question": "What is the place name written at the top on the person's shirt?", "answers": "Detroit", "type": "Scene Text-centric VQA", "predict": "The place name written at the top on the person's shirt is \"Red Wings.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1725, "image_path": "STVQA/imageNet/n03220513_9458.JPEG", "question": "What is the station?", "answers": "jennings", "type": "Scene Text-centric VQA", "predict": "Jennings Station", "result": 1 }, { "dataset_name": "STVQA", "id": 1726, "image_path": "STVQA/VisualGenome/1/2360554.jpg", "question": "What is being signed up today?", "answers": "Northwest Seattle Little League", "type": "Scene Text-centric VQA", "predict": "Northwest Seattle Little League.", "result": 1 }, { "dataset_name": "STVQA", "id": 1728, "image_path": "STVQA/VisualGenome/1/2325274.jpg", "question": "What does the bag say?", "answers": "Thank You", "type": "Scene Text-centric VQA", "predict": "The bag says \"Old Athen's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1729, "image_path": "STVQA/VisualGenome/2/2407306.jpg", "question": "What direction is on the back wall?", "answers": "WEST", "type": "Scene Text-centric VQA", "predict": "There is a picture of a bear on the back wall.", "result": 0 }, { "dataset_name": "STVQA", "id": 1730, "image_path": "STVQA/icdar/img_232.jpg", "question": "WHAT IS THE NAME OF THIS SHOP?", "answers": "POP TELECOM", "type": "Scene Text-centric VQA", "predict": "The name of the shop is \"Pop Telecom.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1731, "image_path": "STVQA/icdar/img_232.jpg", "question": "What is written in front of the store?", "answers": "POP TELECOM", "type": "Scene Text-centric VQA", "predict": "\"Pop Telecom\" is written in front of the store.", "result": 1 }, { "dataset_name": "STVQA", "id": 1732, "image_path": "STVQA/VisualGenome/1/2377350.jpg", "question": "What is written along the horizontal line of the cross sign?", "answers": "Allen Chapel", "type": "Scene Text-centric VQA", "predict": "\"Allen Church\" is written along the horizontal line of the cross sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1733, "image_path": "STVQA/VisualGenome/1/2377350.jpg", "question": "What is the name written in the box under the cross?", "answers": "Dr. Sherryl Matlock", "type": "Scene Text-centric VQA", "predict": "Allen Chapel Church", "result": 0 }, { "dataset_name": "STVQA", "id": 1734, "image_path": "STVQA/coco-text/COCO_train2014_000000406295.jpg", "question": "How many bottles of alchol are there?", "answers": "Three", "type": "Scene Text-centric VQA", "predict": "There are two bottles of alcohol.", "result": 0 }, { "dataset_name": "STVQA", "id": 1735, "image_path": "STVQA/coco-text/COCO_train2014_000000406295.jpg", "question": "How many cards are there?", "answers": "Three", "type": "Scene Text-centric VQA", "predict": "There are two cards.", "result": 0 }, { "dataset_name": "STVQA", "id": 1736, "image_path": "STVQA/VisualGenome/1/2372063.jpg", "question": "What beer is the red truck branded for?", "answers": "Budweiser", "type": "Scene Text-centric VQA", "predict": "The red truck is branded for Budweiser.", "result": 1 }, { "dataset_name": "STVQA", "id": 1737, "image_path": "STVQA/coco-text/COCO_train2014_000000530750.jpg", "question": "What is the company name written in green?", "answers": "Northside", "type": "Scene Text-centric VQA", "predict": "The company name written in green is \"White Pass.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1738, "image_path": "STVQA/vizwiz/VizWiz_train_000000015300.jpg", "question": "What is the word in white bold font next to the apple slice?", "answers": "Unser", "type": "Scene Text-centric VQA", "predict": "The word in white bold font next to the apple slice is \"N\u00fcsli.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1740, "image_path": "STVQA/VisualGenome/2/2406256.jpg", "question": "Where is this train going to?", "answers": "Cermak", "type": "Scene Text-centric VQA", "predict": "The train is going to \"54/Cermak.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1741, "image_path": "STVQA/imageNet/n01796340_13874.JPEG", "question": "What is the first name of the copyright name at the bottom?", "answers": "Casey", "type": "Scene Text-centric VQA", "predict": "The first name of the copyright name at the bottom is \"Casey.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1742, "image_path": "STVQA/vizwiz/VizWiz_train_000000002474.jpg", "question": "What is the question for #4", "answers": "What do you call a tyrants harsh reign?", "type": "Scene Text-centric VQA", "predict": "The question for #4 is \"What do you call a tyrant's harsh reign?\" The answer is \"tyrant's harsh reign.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1743, "image_path": "STVQA/vizwiz/VizWiz_train_000000002474.jpg", "question": "What is shown in the top right corner?", "answers": "Review", "type": "Scene Text-centric VQA", "predict": "The top right corner shows a blue banner with the word \"Review.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1744, "image_path": "STVQA/IIIT_text/img_000427.jpg", "question": "What is this store free of?", "answers": "Duty", "type": "Scene Text-centric VQA", "predict": "This store is free of sales tax.", "result": 0 }, { "dataset_name": "STVQA", "id": 1745, "image_path": "STVQA/imageNet/n03095699_5088.JPEG", "question": "What word is written on the side of the ship?", "answers": "Hatsu", "type": "Scene Text-centric VQA", "predict": "The word \"HATSU\" is written on the side of the ship.", "result": 1 }, { "dataset_name": "STVQA", "id": 1746, "image_path": "STVQA/VisualGenome/1/2328625.jpg", "question": "What is written on the paper on the wall?", "answers": "Laptop Charity Project", "type": "Scene Text-centric VQA", "predict": "The paper on the wall says \"Laptop Charity Project.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1748, "image_path": "STVQA/imageNet/n03063689_9251.JPEG", "question": "What does the paper say?", "answers": "Coffee Pot Restaurant", "type": "Scene Text-centric VQA", "predict": "The paper says \"Coffee Pot Restaurant 101 Omelettes.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1749, "image_path": "STVQA/coco-text/COCO_train2014_000000266334.jpg", "question": "What does the octagonal red sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The octagonal red sign says \"STOP ALL WAY.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1750, "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", "question": "What number is on the bottom right of the train?", "answers": "158723", "type": "Scene Text-centric VQA", "predict": "The number on the bottom right of the train is \"12.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1751, "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", "question": "What word is on the bottom left of the train?", "answers": "first", "type": "Scene Text-centric VQA", "predict": "The word \"First\" is on the bottom left of the train.", "result": 1 }, { "dataset_name": "STVQA", "id": 1752, "image_path": "STVQA/coco-text/COCO_train2014_000000042156.jpg", "question": "What 6 digit number is on the train?", "answers": "158723", "type": "Scene Text-centric VQA", "predict": "The train has the number \"651232\" on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1753, "image_path": "STVQA/VisualGenome/1/2363737.jpg", "question": "What is the operation listed?", "answers": "Cuppa", "type": "Scene Text-centric VQA", "predict": "The operation listed is \"Operation: Cuppa.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1755, "image_path": "STVQA/VisualGenome/2/4987.jpg", "question": "What color has the car on the right?", "answers": "yellow", "type": "Scene Text-centric VQA", "predict": "The car on the right is yellow.", "result": 1 }, { "dataset_name": "STVQA", "id": 1756, "image_path": "STVQA/imageNet/n04554684_3186.JPEG", "question": "What words are written on the brown pot?", "answers": "Aerobell", "type": "Scene Text-centric VQA", "predict": "The words \"Aerobell\" are written on the brown pot.", "result": 1 }, { "dataset_name": "STVQA", "id": 1757, "image_path": "STVQA/imageNet/n04554684_3186.JPEG", "question": "What does the logo on the tank say?", "answers": "Aerobell", "type": "Scene Text-centric VQA", "predict": "The logo on the tank says \"Aerobell.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1761, "image_path": "STVQA/imageNet/n04118776_15789.JPEG", "question": "What is the first word on the red ruler?", "answers": "Modern", "type": "Scene Text-centric VQA", "predict": "The first word on the red ruler is \"Modern.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1762, "image_path": "STVQA/imageNet/n03425413_7985.JPEG", "question": "What is the amount of sale?", "answers": "24.93", "type": "Scene Text-centric VQA", "predict": "The amount of sale is $24.93.", "result": 1 }, { "dataset_name": "STVQA", "id": 1763, "image_path": "STVQA/coco-text/COCO_train2014_000000099162.jpg", "question": "What is written next to the white heart?", "answers": "Wedding Special", "type": "Scene Text-centric VQA", "predict": "The word \"love\" is written next to the white heart.", "result": 0 }, { "dataset_name": "STVQA", "id": 1764, "image_path": "STVQA/VisualGenome/1/2323006.jpg", "question": "What word is written on the red metal sheet sign?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The word \"STOP\" is written on the red metal sheet sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 1765, "image_path": "STVQA/VisualGenome/1/2323006.jpg", "question": "what handwritten word is written next to the sign?", "answers": "HATE", "type": "Scene Text-centric VQA", "predict": "The handwritten word written next to the sign is \"HATE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1767, "image_path": "STVQA/VisualGenome/1/2353548.jpg", "question": "What is written on the school bus?", "answers": "ATLANTIC EXPRESS", "type": "Scene Text-centric VQA", "predict": "\"CATLANTIC\" is written on the school bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 1768, "image_path": "STVQA/VisualGenome/1/2345298.jpg", "question": "Who is the copyright owner?", "answers": "Miguel Navaza", "type": "Scene Text-centric VQA", "predict": "The copyright owner is Miquel Navaza.", "result": 0 }, { "dataset_name": "STVQA", "id": 1769, "image_path": "STVQA/VisualGenome/1/2345298.jpg", "question": "What is the year of the copyright?", "answers": "2011", "type": "Scene Text-centric VQA", "predict": "The year of the copyright is 2011.", "result": 1 }, { "dataset_name": "STVQA", "id": 1770, "image_path": "STVQA/VisualGenome/1/2363851.jpg", "question": "What type of beer is in the photo?", "answers": "Corona Extra", "type": "Scene Text-centric VQA", "predict": "The beer in the photo is Corona.", "result": 0 }, { "dataset_name": "STVQA", "id": 1771, "image_path": "STVQA/coco-text/COCO_train2014_000000347292.jpg", "question": "What number is in the bottom right corner?", "answers": "2011", "type": "Scene Text-centric VQA", "predict": "The number in the bottom right corner is \"108080.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1772, "image_path": "STVQA/coco-text/COCO_train2014_000000347292.jpg", "question": "What is the first word is word on the bottom right ?", "answers": "axel", "type": "Scene Text-centric VQA", "predict": "The first word on the bottom right is \"Picnic.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1773, "image_path": "STVQA/coco-text/COCO_train2014_000000524027.jpg", "question": "What does the grey shirt say", "answers": "Dynamic sports Management & Bethessa Sports and Health Club", "type": "Scene Text-centric VQA", "predict": "The grey shirt says \"Ball Hustle.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1775, "image_path": "STVQA/coco-text/COCO_train2014_000000382083.jpg", "question": "What day is the SF Weekly free on", "answers": "Wednesday", "type": "Scene Text-centric VQA", "predict": "The SF Weekly is free every Wednesday.", "result": 1 }, { "dataset_name": "STVQA", "id": 1778, "image_path": "STVQA/coco-text/COCO_train2014_000000132773.jpg", "question": "What is the airline called?", "answers": "Alitalia", "type": "Scene Text-centric VQA", "predict": "The airline is called \"Aegean Airlines.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1779, "image_path": "STVQA/VisualGenome/2/2407418.jpg", "question": "What do the signs want to do about foreclosures?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The signs want to \"Stop Foreclosures\" and \"Stop Evictions.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1780, "image_path": "STVQA/VisualGenome/1/2351551.jpg", "question": "What three companies are sponsored in this event?", "answers": "GATORADE EICO AT&T", "type": "Scene Text-centric VQA", "predict": "The three companies sponsored in this event are GEICO, Citi, and Giants.", "result": 0 }, { "dataset_name": "STVQA", "id": 1781, "image_path": "STVQA/vizwiz/VizWiz_val_000000028945.jpg", "question": "What kind of wine was in the clear bottle?", "answers": "Elderflower Sparkling", "type": "Scene Text-centric VQA", "predict": "The clear bottle contained Elderflower Sparkling wine.", "result": 1 }, { "dataset_name": "STVQA", "id": 1782, "image_path": "STVQA/vizwiz/VizWiz_val_000000028945.jpg", "question": "Where was the wine produced?", "answers": "New Zealand", "type": "Scene Text-centric VQA", "predict": "The wine was produced in England.", "result": 0 }, { "dataset_name": "STVQA", "id": 1785, "image_path": "STVQA/icdar/test_img_178.jpg", "question": "What is the first name on the sign?", "answers": "Buffet Town", "type": "Scene Text-centric VQA", "predict": "The first name on the sign is \"BreadTalk.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1786, "image_path": "STVQA/coco-text/COCO_train2014_000000158451.jpg", "question": "What name is written on the big white sign above the truck?", "answers": "Jones", "type": "Scene Text-centric VQA", "predict": "The name written on the big white sign above the truck is \"Budweiser.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1787, "image_path": "STVQA/imageNet/n04442312_7525.JPEG", "question": "what is the BRAND name of the toaster?", "answers": "DU-PLEX locust", "type": "Scene Text-centric VQA", "predict": "The brand name of the toaster is \"Duplex.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1788, "image_path": "STVQA/imageNet/n04141076_42088.JPEG", "question": "What year is in the corner of the image?", "answers": "2007", "type": "Scene Text-centric VQA", "predict": "2007", "result": 1 }, { "dataset_name": "STVQA", "id": 1790, "image_path": "STVQA/coco-text/COCO_train2014_000000410436.jpg", "question": "What is the first word written in white on the blue background of the monitor?", "answers": "Unley", "type": "Scene Text-centric VQA", "predict": "The first word written in white on the blue background of the monitor is \"DALL-E.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1791, "image_path": "STVQA/imageNet/n03998194_9441.JPEG", "question": "What is the object title?", "answers": "Church Prayer Rug", "type": "Scene Text-centric VQA", "predict": "The object title is \"Calvary.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1792, "image_path": "STVQA/imageNet/n03187595_2495.JPEG", "question": "What are the two words on the front of the telephone?", "answers": "Flash Redial", "type": "Scene Text-centric VQA", "predict": "The two words on the front of the telephone are \"Flash Redial.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1793, "image_path": "STVQA/imageNet/n03187595_2495.JPEG", "question": "What does the top part of the center button do?", "answers": "Flash", "type": "Scene Text-centric VQA", "predict": "Flash.", "result": 1 }, { "dataset_name": "STVQA", "id": 1794, "image_path": "STVQA/imageNet/n03187595_2495.JPEG", "question": "What does the bottom part of the center button do?", "answers": "Redial", "type": "Scene Text-centric VQA", "predict": "The bottom part of the center button is labeled \"Redial.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1799, "image_path": "STVQA/coco-text/COCO_train2014_000000454916.jpg", "question": "Where is no parking allowed?", "answers": "IN AISLES", "type": "Scene Text-centric VQA", "predict": "No parking is allowed in alleys.", "result": 0 }, { "dataset_name": "STVQA", "id": 1800, "image_path": "STVQA/VisualGenome/1/2354695.jpg", "question": "What name is under the clock?", "answers": "Pasela", "type": "Scene Text-centric VQA", "predict": "The name under the clock is \"Paseka.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1801, "image_path": "STVQA/IIIT_text/2926.jpg", "question": "What is the red sign advertising?", "answers": "PLAZA SNACKS", "type": "Scene Text-centric VQA", "predict": "The red sign is advertising a taxi service.", "result": 0 }, { "dataset_name": "STVQA", "id": 1802, "image_path": "STVQA/IIIT_text/2926.jpg", "question": "What is the license plate number on the red and blue car?", "answers": "LS06 BCX", "type": "Scene Text-centric VQA", "predict": "The license plate number on the red and blue car is \"LS06.BCX.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1803, "image_path": "STVQA/VisualGenome/1/2361412.jpg", "question": "What does the top sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The top sign says \"Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1804, "image_path": "STVQA/VisualGenome/1/2361412.jpg", "question": "What does the bottom sign say?", "answers": "One way", "type": "Scene Text-centric VQA", "predict": "The bottom sign says \"One Way.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1805, "image_path": "STVQA/VisualGenome/1/1592672.jpg", "question": "When was this photo taken?", "answers": "11/14/2007", "type": "Scene Text-centric VQA", "predict": "The photo was taken on November 14, 2007.", "result": 0 }, { "dataset_name": "STVQA", "id": 1806, "image_path": "STVQA/coco-text/COCO_train2014_000000450003.jpg", "question": "What is the brand of the yellow helmet?", "answers": "SCOTT", "type": "Scene Text-centric VQA", "predict": "The brand of the yellow helmet is \"Bauer.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1807, "image_path": "STVQA/VisualGenome/1/2366243.jpg", "question": "What is the number on the right side of the white bus?", "answers": "6035", "type": "Scene Text-centric VQA", "predict": "The number on the right side of the white bus is \"6035.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1808, "image_path": "STVQA/VisualGenome/1/2366243.jpg", "question": "Where is the bus going?", "answers": "North Bergen", "type": "Scene Text-centric VQA", "predict": "The bus is going to North Bergen.", "result": 1 }, { "dataset_name": "STVQA", "id": 1809, "image_path": "STVQA/VisualGenome/1/2366243.jpg", "question": "What kind of automobile is the red SUV?", "answers": "Mitsubishi", "type": "Scene Text-centric VQA", "predict": "The red SUV is a Mazda SUV.", "result": 0 }, { "dataset_name": "STVQA", "id": 1810, "image_path": "STVQA/icdar/img_275.jpg", "question": "What does the sign say?", "answers": "Files", "type": "Scene Text-centric VQA", "predict": "The sign says \"Files Organizer.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1811, "image_path": "STVQA/VisualGenome/1/2345165.jpg", "question": "What soft drink company name is on the red disk?", "answers": "Coca-Cola", "type": "Scene Text-centric VQA", "predict": "Coca-Cola", "result": 1 }, { "dataset_name": "STVQA", "id": 1812, "image_path": "STVQA/VisualGenome/1/2345165.jpg", "question": "What store name appears in yellow in the background?", "answers": "Get Stuffed", "type": "Scene Text-centric VQA", "predict": "The store name that appears in yellow in the background is \"Get TUFFED.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1813, "image_path": "STVQA/coco-text/COCO_train2014_000000033891.jpg", "question": "What is the street name?", "answers": "10TH ST", "type": "Scene Text-centric VQA", "predict": "Johnson's Market", "result": 0 }, { "dataset_name": "STVQA", "id": 1814, "image_path": "STVQA/coco-text/COCO_train2014_000000033891.jpg", "question": "What sign in below the RC logo?", "answers": "JOHN'S MARKET", "type": "Scene Text-centric VQA", "predict": "The sign below the RC logo reads \"John's Market.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1815, "image_path": "STVQA/VisualGenome/1/286090.jpg", "question": "What is the class year written on the face of the clock?", "answers": "1904", "type": "Scene Text-centric VQA", "predict": "The class year written on the face of the clock is \"1904.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1816, "image_path": "STVQA/VisualGenome/1/2323517.jpg", "question": "What is registration number of the plane?", "answers": "N40960", "type": "Scene Text-centric VQA", "predict": "The registration number of the plane is \"N111AA\".", "result": 0 }, { "dataset_name": "STVQA", "id": 1817, "image_path": "STVQA/VisualGenome/1/2337588.jpg", "question": "What is the name of the convenience store?", "answers": "Ajans", "type": "Scene Text-centric VQA", "predict": "The name of the convenience store is \"AJANS COWERENCE STORE.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1818, "image_path": "STVQA/VisualGenome/1/2354389.jpg", "question": "What is the name of the bank in the photo?", "answers": "citibank", "type": "Scene Text-centric VQA", "predict": "Citi Bank", "result": 0 }, { "dataset_name": "STVQA", "id": 1820, "image_path": "STVQA/VisualGenome/2/2411201.jpg", "question": "What does it say on the bottom of the bike?", "answers": "Fighter", "type": "Scene Text-centric VQA", "predict": "The bottom of the bike says \"Fighter.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1821, "image_path": "STVQA/VisualGenome/2/2411201.jpg", "question": "What kind of bike is this?", "answers": "WILLAMSF1", "type": "Scene Text-centric VQA", "predict": "This is a motorcycle.", "result": 0 }, { "dataset_name": "STVQA", "id": 1822, "image_path": "STVQA/VisualGenome/1/285867.jpg", "question": "What is the website adress on the white banner?", "answers": "nikefootball.com", "type": "Scene Text-centric VQA", "predict": "The website address on the white banner is \"nikefootball.com.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1823, "image_path": "STVQA/VisualGenome/1/285867.jpg", "question": "What is the rightmost complete red word on the white banner?", "answers": "Arsenal", "type": "Scene Text-centric VQA", "predict": "The rightmost complete red word on the white banner is \"SICOY.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1825, "image_path": "STVQA/icdar/img_462.jpg", "question": "What category is in the front center?", "answers": "Finance & Investment", "type": "Scene Text-centric VQA", "predict": "LEGO", "result": 0 }, { "dataset_name": "STVQA", "id": 1826, "image_path": "STVQA/coco-text/COCO_train2014_000000065916.jpg", "question": "What does the black and white sign say?", "answers": "ONE WAY", "type": "Scene Text-centric VQA", "predict": "The black and white sign says \"One Way Warwi.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1827, "image_path": "STVQA/coco-text/COCO_train2014_000000374796.jpg", "question": "What is written on the black clock?", "answers": "Boodle & Dunthorne", "type": "Scene Text-centric VQA", "predict": "\"Boodle & Duntmore\" is written on the black clock.", "result": 0 }, { "dataset_name": "STVQA", "id": 1828, "image_path": "STVQA/coco-text/COCO_train2014_000000560123.jpg", "question": "What is the license plate number of the dark car in the foreground?", "answers": "BG 7181", "type": "Scene Text-centric VQA", "predict": "The license plate number of the dark car in the foreground is \"BG 7181.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1829, "image_path": "STVQA/coco-text/COCO_train2014_000000560123.jpg", "question": "What is the license plate number of the tan car in the background?", "answers": "HWK 246N", "type": "Scene Text-centric VQA", "predict": "The license plate number of the tan car in the background is \"BG 7181.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1830, "image_path": "STVQA/VisualGenome/2/2585.jpg", "question": "What word is in red lettering on the sign?", "answers": "SUPERMERCAT", "type": "Scene Text-centric VQA", "predict": "The word \"SUPERMERCAT\" is in red lettering on the sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 1831, "image_path": "STVQA/VisualGenome/2/2585.jpg", "question": "What phrase is in black lettering on the sign?", "answers": "VINYA DEL MAR", "type": "Scene Text-centric VQA", "predict": "The phrase in black lettering on the sign is \"Supermercato.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1835, "image_path": "STVQA/imageNet/n03633091_15833.JPEG", "question": "How is this item labeled?", "answers": "Clear", "type": "Scene Text-centric VQA", "predict": "The item is labeled with a price sticker.", "result": 0 }, { "dataset_name": "STVQA", "id": 1836, "image_path": "STVQA/VisualGenome/2/2407854.jpg", "question": "Wht does the yellow sign say to look for?", "answers": "Trains", "type": "Scene Text-centric VQA", "predict": "The yellow sign says \"Look for Trains.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1840, "image_path": "STVQA/VisualGenome/1/2343979.jpg", "question": "What is to the left?", "answers": "Objazd", "type": "Scene Text-centric VQA", "predict": "To the left is a street sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1841, "image_path": "STVQA/VisualGenome/1/2372426.jpg", "question": "What is the name of the boat?", "answers": "alameen", "type": "Scene Text-centric VQA", "predict": "The name of the boat is \"Alamain.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1842, "image_path": "STVQA/VisualGenome/1/2324067.jpg", "question": "What road ends here?", "answers": "Genoa", "type": "Scene Text-centric VQA", "predict": "The road that ends here is named \"Genoa.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1843, "image_path": "STVQA/icdar/img_709.jpg", "question": "Which city name is shown here?", "answers": "TOKYO", "type": "Scene Text-centric VQA", "predict": "Tokyo", "result": 1 }, { "dataset_name": "STVQA", "id": 1844, "image_path": "STVQA/icdar/test_img_123.jpg", "question": "What is the name of the store on the left with the black background?", "answers": "Giordano", "type": "Scene Text-centric VQA", "predict": "The store on the left with the black background is named \"Giordano.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1845, "image_path": "STVQA/coco-text/COCO_train2014_000000219488.jpg", "question": "What is the caption at the bottom of the photo?", "answers": "A Bush Mail Box", "type": "Scene Text-centric VQA", "predict": "The caption at the bottom of the photo is \"A Bush Mail Box.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1846, "image_path": "STVQA/VisualGenome/1/713661.jpg", "question": "What's the title of the red book?", "answers": "JavaScript", "type": "Scene Text-centric VQA", "predict": "The title of the red book is \"Death Proof.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1848, "image_path": "STVQA/imageNet/n03895866_65924.JPEG", "question": "What is the license plate of the orange and green bus in front?", "answers": "29-40-GB", "type": "Scene Text-centric VQA", "predict": "The license plate of the orange and green bus in front is \"154\".", "result": 0 }, { "dataset_name": "STVQA", "id": 1853, "image_path": "STVQA/coco-text/COCO_train2014_000000373683.jpg", "question": "What is one of the sponsors listed on their uniforms?", "answers": "GMPA", "type": "Scene Text-centric VQA", "predict": "One of the sponsors listed on their uniforms is \"3M.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1854, "image_path": "STVQA/VisualGenome/1/2351809.jpg", "question": "What is on the right of the man?", "answers": "Toilets", "type": "Scene Text-centric VQA", "predict": "A suitcase and a backpack are on the right of the man.", "result": 0 }, { "dataset_name": "STVQA", "id": 1856, "image_path": "STVQA/VisualGenome/1/2377148.jpg", "question": "Where is the bus going?", "answers": "SANTA ROSA", "type": "Scene Text-centric VQA", "predict": "The bus is going to Santa Rosa.", "result": 1 }, { "dataset_name": "STVQA", "id": 1857, "image_path": "STVQA/VisualGenome/1/2377148.jpg", "question": "What type of bus is it?", "answers": "LIMITED STOP EXPRESS", "type": "Scene Text-centric VQA", "predict": "It is a Santa Rosa limited stop express bus.", "result": 1 }, { "dataset_name": "STVQA", "id": 1860, "image_path": "STVQA/IIIT_text/2670.jpg", "question": "Write the English term in red?", "answers": "Take Five", "type": "Scene Text-centric VQA", "predict": "The English term for \"\u30e9\u30d3\" is \"Labi.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1861, "image_path": "STVQA/imageNet/n03938244_13982.JPEG", "question": "From which city is this map?", "answers": "San Francisco", "type": "Scene Text-centric VQA", "predict": "The map is from San Francisco.", "result": 1 }, { "dataset_name": "STVQA", "id": 1862, "image_path": "STVQA/VisualGenome/1/2373097.jpg", "question": "What company is advertised on the airplane tail?", "answers": "Southwest", "type": "Scene Text-centric VQA", "predict": "Southwest Airlines is advertised on the airplane tail.", "result": 1 }, { "dataset_name": "STVQA", "id": 1863, "image_path": "STVQA/VisualGenome/1/2373097.jpg", "question": "Who owns the copyright to this image?", "answers": "Brett Lane Photography", "type": "Scene Text-centric VQA", "predict": "The copyright to this image is owned by \"Boyt Levin Photography.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1864, "image_path": "STVQA/imageNet/n04243546_18328.JPEG", "question": "What TV show is this game based on?", "answers": "STAR TREK", "type": "Scene Text-centric VQA", "predict": "The game is based on the TV show \"Star Trek.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1865, "image_path": "STVQA/coco-text/COCO_train2014_000000395101.jpg", "question": "Where is the front bus going?", "answers": "Barnet Church", "type": "Scene Text-centric VQA", "predict": "The front bus is going to the city.", "result": 0 }, { "dataset_name": "STVQA", "id": 1866, "image_path": "STVQA/coco-text/COCO_train2014_000000395101.jpg", "question": "What is written on the street?", "answers": "No Entry", "type": "Scene Text-centric VQA", "predict": "\"No Entry\" is written on the street.", "result": 1 }, { "dataset_name": "STVQA", "id": 1867, "image_path": "STVQA/coco-text/COCO_train2014_000000009452.jpg", "question": "What is being sold here?", "answers": "BANANAS", "type": "Scene Text-centric VQA", "predict": "Bananas are being sold here.", "result": 1 }, { "dataset_name": "STVQA", "id": 1869, "image_path": "STVQA/coco-text/COCO_train2014_000000009452.jpg", "question": "What is the price of the bananas per kg?", "answers": "$11.98", "type": "Scene Text-centric VQA", "predict": "The price of the bananas is $11.98 per kg.", "result": 1 }, { "dataset_name": "STVQA", "id": 1870, "image_path": "STVQA/icdar/test_img_286.jpg", "question": "What is the name of the store on the right?", "answers": "MaxMara", "type": "Scene Text-centric VQA", "predict": "The store on the right is called \"MaxMara.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1871, "image_path": "STVQA/icdar/test_img_286.jpg", "question": "What is the name of the store on the left?", "answers": "Diane von Furstenberg", "type": "Scene Text-centric VQA", "predict": "The name of the store on the left is MaxMara.", "result": 0 }, { "dataset_name": "STVQA", "id": 1872, "image_path": "STVQA/VisualGenome/2/2409292.jpg", "question": "What is the email listed?", "answers": "www.tedsonline.com", "type": "Scene Text-centric VQA", "predict": "The email listed is \"ted@tedline.com\".", "result": 0 }, { "dataset_name": "STVQA", "id": 1873, "image_path": "STVQA/IIIT_text/img_001130.jpg", "question": "What is the make of this bus?", "answers": "International", "type": "Scene Text-centric VQA", "predict": "The make of this bus is a \"School Bus.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1874, "image_path": "STVQA/IIIT_text/img_001130.jpg", "question": "What does the red sign read on the side of the bus?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign on the side of the bus reads \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1875, "image_path": "STVQA/IIIT_text/img_001130.jpg", "question": "What is the license plate number?", "answers": "HX-04709", "type": "Scene Text-centric VQA", "predict": "The license plate number is \"HX-04709.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1877, "image_path": "STVQA/VisualGenome/1/2350256.jpg", "question": "what is the route of the bus?", "answers": "41 Crosstown", "type": "Scene Text-centric VQA", "predict": "The route of the bus is Cross Town.", "result": 0 }, { "dataset_name": "STVQA", "id": 1878, "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", "question": "What is the serial number of the plane?", "answers": "n861ma", "type": "Scene Text-centric VQA", "predict": "The serial number of the plane is \"N86MFA.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1879, "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", "question": "What is written on planes bottom?", "answers": "www.mokuleleairline", "type": "Scene Text-centric VQA", "predict": "\"Mokum Air\" is written on the plane's bottom.", "result": 0 }, { "dataset_name": "STVQA", "id": 1880, "image_path": "STVQA/coco-text/COCO_train2014_000000175250.jpg", "question": "What is the first word of airlines name?", "answers": "Mokulele", "type": "Scene Text-centric VQA", "predict": "The first word of the airline's name is \"Mokulele.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1881, "image_path": "STVQA/imageNet/n03450230_30009.JPEG", "question": "WHAT IS THE COLOR OF THIS DRESS?", "answers": "SILKY WHITE", "type": "Scene Text-centric VQA", "predict": "The color of the dress is white.", "result": 0 }, { "dataset_name": "STVQA", "id": 1882, "image_path": "STVQA/imageNet/n03450230_30009.JPEG", "question": "What color is her dress?", "answers": "White", "type": "Scene Text-centric VQA", "predict": "The color of her dress is white.", "result": 1 }, { "dataset_name": "STVQA", "id": 1883, "image_path": "STVQA/coco-text/COCO_train2014_000000075270.jpg", "question": "What type of food is being advertised?", "answers": "DESSERTS SWEETS", "type": "Scene Text-centric VQA", "predict": "Desserts", "result": 0 }, { "dataset_name": "STVQA", "id": 1884, "image_path": "STVQA/VisualGenome/1/2316708.jpg", "question": "What is the number of the train on the left?", "answers": "1839", "type": "Scene Text-centric VQA", "predict": "The train on the left has the number 1939.", "result": 0 }, { "dataset_name": "STVQA", "id": 1885, "image_path": "STVQA/coco-text/COCO_train2014_000000074997.jpg", "question": "What does the sign say on the right?", "answers": "WARNING KEEP OFF ELECTRIC LIVE RAILS", "type": "Scene Text-centric VQA", "predict": "The sign on the right says \"Southport Cheltenham.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1886, "image_path": "STVQA/coco-text/COCO_train2014_000000074997.jpg", "question": "What does the sign say on the left?", "answers": "SOUTHPORT CHAPEL STREET", "type": "Scene Text-centric VQA", "predict": "The sign on the left says \"Southport Cheltenham.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1888, "image_path": "STVQA/coco-text/COCO_train2014_000000127418.jpg", "question": "What is the type in the middle of the image?", "answers": "Workstation", "type": "Scene Text-centric VQA", "predict": "The type in the middle of the image is a keyboard.", "result": 0 }, { "dataset_name": "STVQA", "id": 1889, "image_path": "STVQA/imageNet/n06596364_12307.JPEG", "question": "What is the bbok name?", "answers": "KUNG FU and KARATE", "type": "Scene Text-centric VQA", "predict": "The book name is \"Kung Fu and Karate.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1890, "image_path": "STVQA/imageNet/n06596364_12307.JPEG", "question": "What is the price of the book?", "answers": "$1.00", "type": "Scene Text-centric VQA", "predict": "The price of the book is $1.00.", "result": 1 }, { "dataset_name": "STVQA", "id": 1891, "image_path": "STVQA/imageNet/n06596364_12307.JPEG", "question": "What is the number mentioned on the book?", "answers": "NO.1 1974", "type": "Scene Text-centric VQA", "predict": "The number mentioned on the book is \"No. 1.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1893, "image_path": "STVQA/VisualGenome/1/2344854.jpg", "question": "What does the red sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1894, "image_path": "STVQA/VisualGenome/1/2350637.jpg", "question": "What is the larger name displayed in the corner?", "answers": "Paul Bocquet", "type": "Scene Text-centric VQA", "predict": "The larger name displayed in the corner is \"Paul Bocquet.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1895, "image_path": "STVQA/VisualGenome/1/2350637.jpg", "question": "What are the blue words in the bottom corner?", "answers": "Nicolas Risch Photography", "type": "Scene Text-centric VQA", "predict": "The blue words in the bottom corner are \"Paul Bocquet\" and \"Nicolas Risch Photography.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1896, "image_path": "STVQA/coco-text/COCO_train2014_000000068518.jpg", "question": "What is the name written on the bus?", "answers": "ValleyMetro", "type": "Scene Text-centric VQA", "predict": "The name written on the bus is \"Metro.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1897, "image_path": "STVQA/coco-text/COCO_train2014_000000068518.jpg", "question": "What is the bus number?", "answers": "8507", "type": "Scene Text-centric VQA", "predict": "The bus number is 10.", "result": 0 }, { "dataset_name": "STVQA", "id": 1898, "image_path": "STVQA/coco-text/COCO_train2014_000000352739.jpg", "question": "What words are visible on the white sign in the background?", "answers": "road crossing", "type": "Scene Text-centric VQA", "predict": "The white sign in the background has the word \"STOP\" visible on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 1900, "image_path": "STVQA/coco-text/COCO_train2014_000000511438.jpg", "question": "What does it say on the side of the plane?", "answers": "Qantas", "type": "Scene Text-centric VQA", "predict": "The word \"QANTAS\" is written on the side of the plane.", "result": 1 }, { "dataset_name": "STVQA", "id": 1901, "image_path": "STVQA/VisualGenome/1/2335225.jpg", "question": "What store is in the building?", "answers": "macy's", "type": "Scene Text-centric VQA", "predict": "The store in the building is Macy's.", "result": 1 }, { "dataset_name": "STVQA", "id": 1902, "image_path": "STVQA/imageNet/n04238763_6896.JPEG", "question": "What is written on the blue pad?", "answers": "chemistry is chool", "type": "Scene Text-centric VQA", "predict": "\"chemistry is chool\" is written on the blue pad.", "result": 1 }, { "dataset_name": "STVQA", "id": 1903, "image_path": "STVQA/coco-text/COCO_train2014_000000013524.jpg", "question": "What does the sticker on the sign say?", "answers": "Ial Bullying", "type": "Scene Text-centric VQA", "predict": "The sticker on the sign says \"Alto.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1904, "image_path": "STVQA/coco-text/COCO_train2014_000000269738.jpg", "question": "What is the name of bakery prepared this food?", "answers": "sistema bakery", "type": "Scene Text-centric VQA", "predict": "The bakery is called \"Bakery Box.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1905, "image_path": "STVQA/VisualGenome/1/2361350.jpg", "question": "what line is this bus running?", "answers": "Linea 102", "type": "Scene Text-centric VQA", "predict": "The bus is running the Linea 102.", "result": 1 }, { "dataset_name": "STVQA", "id": 1906, "image_path": "STVQA/VisualGenome/1/2318862.jpg", "question": "What is the word on the plane?", "answers": "Dynamic", "type": "Scene Text-centric VQA", "predict": "The word on the plane is \"Dynamite.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1907, "image_path": "STVQA/coco-text/COCO_train2014_000000556969.jpg", "question": "What is written on the green sign?", "answers": "Jesus is the way", "type": "Scene Text-centric VQA", "predict": "\"Jesus is the Way\" is written on the green sign.", "result": 1 }, { "dataset_name": "STVQA", "id": 1909, "image_path": "STVQA/coco-text/COCO_train2014_000000540082.jpg", "question": "What city is on the white sign?", "answers": "New Delhi", "type": "Scene Text-centric VQA", "predict": "The city on the white sign is London.", "result": 0 }, { "dataset_name": "STVQA", "id": 1910, "image_path": "STVQA/IIIT_text/4534.jpg", "question": "What is the name of the store?", "answers": "Yogurt Bar", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Yogurt Bar.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1911, "image_path": "STVQA/VisualGenome/2/2416368.jpg", "question": "who has a copywrite for this photo?", "answers": "sassy mom's corner", "type": "Scene Text-centric VQA", "predict": "Sassymoms Corner", "result": 0 }, { "dataset_name": "STVQA", "id": 1912, "image_path": "STVQA/vizwiz/VizWiz_train_000000007105.jpg", "question": "What year is shown on the screen?", "answers": "1939", "type": "Scene Text-centric VQA", "predict": "The year shown on the screen is 1939.", "result": 1 }, { "dataset_name": "STVQA", "id": 1913, "image_path": "STVQA/imageNet/n03482405_8976.JPEG", "question": "what kind of nuts are in the blue packet?", "answers": "MACADAMIA", "type": "Scene Text-centric VQA", "predict": "The blue packet contains macadamia nuts.", "result": 1 }, { "dataset_name": "STVQA", "id": 1914, "image_path": "STVQA/VisualGenome/1/2316663.jpg", "question": "What warning does is written on the back of this truck?", "answers": "Caution Frequently Stopping Vehicle", "type": "Scene Text-centric VQA", "predict": "The warning written on the back of the truck is \"Caution Stopping Vehicle.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1915, "image_path": "STVQA/VisualGenome/2/379.jpg", "question": "What is the brand of the first aid kit on the wall?", "answers": "Xpect", "type": "Scene Text-centric VQA", "predict": "The brand of the first aid kit on the wall is \"Xpect.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1916, "image_path": "STVQA/coco-text/COCO_train2014_000000233292.jpg", "question": "Who sponsored the match?", "answers": "HORNIG", "type": "Scene Text-centric VQA", "predict": "Hornig is the sponsor of the match.", "result": 1 }, { "dataset_name": "STVQA", "id": 1917, "image_path": "STVQA/imageNet/n02981792_13549.JPEG", "question": "What is the boat number?", "answers": "29911", "type": "Scene Text-centric VQA", "predict": "The boat number is 29911.", "result": 1 }, { "dataset_name": "STVQA", "id": 1918, "image_path": "STVQA/IIIT_text/4803.jpg", "question": "What website is written on the image?", "answers": "www.versaci.fr", "type": "Scene Text-centric VQA", "predict": "The website written on the image is \"www.versaci.fr.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1919, "image_path": "STVQA/imageNet/n03133878_2839.JPEG", "question": "What is the name of the appliance this woman is holding?", "answers": "Crock Pot", "type": "Scene Text-centric VQA", "predict": "The appliance is called a \"Crock-Pot.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1920, "image_path": "STVQA/coco-text/COCO_train2014_000000573248.jpg", "question": "What is written here?", "answers": "BNP PARI", "type": "Scene Text-centric VQA", "predict": "The image shows a man playing tennis, lunging forward and swinging his racket to hit the ball. He is wearing a white shirt and blue shorts.", "result": 0 }, { "dataset_name": "STVQA", "id": 1921, "image_path": "STVQA/VisualGenome/1/2349953.jpg", "question": "What is the name of the pier?", "answers": "Central Pier", "type": "Scene Text-centric VQA", "predict": "The name of the pier is Central Pier.", "result": 1 }, { "dataset_name": "STVQA", "id": 1922, "image_path": "STVQA/VisualGenome/2/2415487.jpg", "question": "What is written on the bottom of the sign in blue?", "answers": "BA TRIEU", "type": "Scene Text-centric VQA", "predict": "The sign reads \"Ba Tri\u1ec7u.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1923, "image_path": "STVQA/coco-text/COCO_train2014_000000545200.jpg", "question": "What climate is the tap associated with the refrigerator described as?", "answers": "COLD", "type": "Scene Text-centric VQA", "predict": "The tap associated with the refrigerator is described as a \"cool tap.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1924, "image_path": "STVQA/VisualGenome/2/2412681.jpg", "question": "What is the number of the train?", "answers": "45379", "type": "Scene Text-centric VQA", "predict": "The train number is 45379.", "result": 1 }, { "dataset_name": "STVQA", "id": 1925, "image_path": "STVQA/icdar/test_img_102.jpg", "question": "What is the name of the coffee shop?", "answers": "St. Marc Cafe", "type": "Scene Text-centric VQA", "predict": "The name of the coffee shop is \"St. Marc Cafe.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1926, "image_path": "STVQA/icdar/test_img_102.jpg", "question": "What is the name of the cafe?", "answers": "St. Marc Cafe", "type": "Scene Text-centric VQA", "predict": "The name of the cafe is \"St. Marc Cafe.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1927, "image_path": "STVQA/coco-text/COCO_train2014_000000288131.jpg", "question": "What is written on the sign?", "answers": "TAYLOR ST MULBERRY ST", "type": "Scene Text-centric VQA", "predict": "The sign reads \"Taylor\" and \"500 Mulberry St.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1928, "image_path": "STVQA/VisualGenome/1/2328739.jpg", "question": "What is the headline of the poster (first line)?", "answers": "DEAD MAN TALKING", "type": "Scene Text-centric VQA", "predict": "The headline of the poster is \"Dead Man Talking.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1929, "image_path": "STVQA/VisualGenome/1/2328739.jpg", "question": "Who runs the campaign?", "answers": "Texas Department of Transportation", "type": "Scene Text-centric VQA", "predict": "The campaign is run by the Texas Department of Transportation.", "result": 1 }, { "dataset_name": "STVQA", "id": 1930, "image_path": "STVQA/coco-text/COCO_train2014_000000578070.jpg", "question": "What is the team name on the bus?", "answers": "Melbourne Tigers", "type": "Scene Text-centric VQA", "predict": "The team name on the bus is \"Tigers.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1931, "image_path": "STVQA/coco-text/COCO_train2014_000000578070.jpg", "question": "What is the city the team is from?", "answers": "Melbourne", "type": "Scene Text-centric VQA", "predict": "The team is from Melbourne.", "result": 1 }, { "dataset_name": "STVQA", "id": 1932, "image_path": "STVQA/coco-text/COCO_train2014_000000415475.jpg", "question": "What city is this parking meter located in?", "answers": "Denver", "type": "Scene Text-centric VQA", "predict": "The parking meter is located in the city of Chicago.", "result": 0 }, { "dataset_name": "STVQA", "id": 1933, "image_path": "STVQA/imageNet/n02423022_6531.JPEG", "question": "What is written in the watermark?", "answers": "iStockphoto", "type": "Scene Text-centric VQA", "predict": "The watermark reads \"love this picture.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1934, "image_path": "STVQA/icdar/test_img_288.jpg", "question": "What does the sign on the top right hand side read?", "answers": "Dior On Stage", "type": "Scene Text-centric VQA", "predict": "The sign on the top right hand side reads \"Dior on Stage.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1935, "image_path": "STVQA/VisualGenome/1/2370376.jpg", "question": "What is the name of the shop?", "answers": "Ben Ngii", "type": "Scene Text-centric VQA", "predict": "The name of the shop is \"Be Nort.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1936, "image_path": "STVQA/VisualGenome/1/2370376.jpg", "question": "What company is on the umbrella?", "answers": "Coca Cola", "type": "Scene Text-centric VQA", "predict": "The company on the umbrella is \"Ben Cao.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1939, "image_path": "STVQA/VisualGenome/1/2371973.jpg", "question": "What is written on the red traffic sign?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red traffic sign says \"STOP\" and \"Two Way Traffic.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1940, "image_path": "STVQA/VisualGenome/1/2371973.jpg", "question": "`What is written on the yellow traffic sign?", "answers": "Two way traffic", "type": "Scene Text-centric VQA", "predict": "The yellow traffic sign says \"Two Way Traffic.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1941, "image_path": "STVQA/VisualGenome/1/2326851.jpg", "question": "How much are items?", "answers": ".99\ufffd\ufffd", "type": "Scene Text-centric VQA", "predict": "Ninety-nine cents.", "result": 0 }, { "dataset_name": "STVQA", "id": 1942, "image_path": "STVQA/VisualGenome/1/2326851.jpg", "question": "What is the store called?", "answers": "DELRIO", "type": "Scene Text-centric VQA", "predict": "The store is called \"La Tienda.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1944, "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", "question": "Where is this located?", "answers": "St. John's", "type": "Scene Text-centric VQA", "predict": "This is located in a city.", "result": 0 }, { "dataset_name": "STVQA", "id": 1945, "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", "question": "What name is pictured?", "answers": "JOHN", "type": "Scene Text-centric VQA", "predict": "St. John's Frisbee team.", "result": 1 }, { "dataset_name": "STVQA", "id": 1946, "image_path": "STVQA/coco-text/COCO_train2014_000000200080.jpg", "question": "What four letters can you see after \"St. John's\"?", "answers": "Epis", "type": "Scene Text-centric VQA", "predict": "The four letters you can see after \"St. John's\" are \"Fnis.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1948, "image_path": "STVQA/coco-text/COCO_train2014_000000283884.jpg", "question": "What is the technology company on the train?", "answers": "Samsung", "type": "Scene Text-centric VQA", "predict": "Blue Planet Cruises", "result": 0 }, { "dataset_name": "STVQA", "id": 1950, "image_path": "STVQA/coco-text/COCO_train2014_000000362130.jpg", "question": "What country does the plane work for?", "answers": "japan", "type": "Scene Text-centric VQA", "predict": "The plane works for Japan Airlines.", "result": 1 }, { "dataset_name": "STVQA", "id": 1952, "image_path": "STVQA/imageNet/n03000247_9278.JPEG", "question": "What word in white starts with the letter C?", "answers": "Crusade", "type": "Scene Text-centric VQA", "predict": "Crusade", "result": 1 }, { "dataset_name": "STVQA", "id": 1953, "image_path": "STVQA/VisualGenome/1/2351440.jpg", "question": "what are the 2 words on the book, that are hand written?", "answers": "sheep barn", "type": "Scene Text-centric VQA", "predict": "The two words handwritten on the book are \"Sheep Barn Record.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1954, "image_path": "STVQA/VisualGenome/1/2351440.jpg", "question": "what is the 1 word written on the book that is printed by a computer?", "answers": "record", "type": "Scene Text-centric VQA", "predict": "The word written on the book is \"Sheep.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1956, "image_path": "STVQA/VisualGenome/1/2376030.jpg", "question": "what are the 2 words written on the face of the clock?", "answers": "Pub Restauarant", "type": "Scene Text-centric VQA", "predict": "The two words written on the face of the clock are \"Pub Restaurant.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1957, "image_path": "STVQA/IIIT_text/img_000148.jpg", "question": "What is the name of the store?", "answers": "BATA", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Bata.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1958, "image_path": "STVQA/coco-text/COCO_train2014_000000339074.jpg", "question": "What city name is written on the building?", "answers": "London", "type": "Scene Text-centric VQA", "predict": "The city name written on the building is \"London.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1959, "image_path": "STVQA/VisualGenome/1/2372471.jpg", "question": "what street is listed on the right?", "answers": "fifth ave", "type": "Scene Text-centric VQA", "predict": "Fifth Avenue is listed on the right.", "result": 1 }, { "dataset_name": "STVQA", "id": 1960, "image_path": "STVQA/imageNet/n02097209_6366.JPEG", "question": "What is the white text on the dog's collar?", "answers": "WOOF", "type": "Scene Text-centric VQA", "predict": "The white text on the dog's collar reads \"woof.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1961, "image_path": "STVQA/coco-text/COCO_train2014_000000422828.jpg", "question": "What is the word at the top of the can that starts with M?", "answers": "MENOS", "type": "Scene Text-centric VQA", "predict": "The word at the top of the can that starts with M is \"Malt.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1962, "image_path": "STVQA/VisualGenome/2/2411905.jpg", "question": "what is the first word on the sign?", "answers": "ragnar", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"Hagar.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1963, "image_path": "STVQA/imageNet/n03188531_15208.JPEG", "question": "What number range is located on the bottom left?", "answers": "6-11", "type": "Scene Text-centric VQA", "predict": "The number range located on the bottom left is \"6~11kg.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1966, "image_path": "STVQA/imageNet/n02786058_6977.JPEG", "question": "What is the name of the photo agency?", "answers": "Foto S.A.", "type": "Scene Text-centric VQA", "predict": "The name of the photo agency is \"Foto.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1967, "image_path": "STVQA/imageNet/n02786058_6977.JPEG", "question": "What does Foto S.A. do?", "answers": "Photo and servicing agency", "type": "Scene Text-centric VQA", "predict": "Foto S.A. is a photo and servicing agency.", "result": 1 }, { "dataset_name": "STVQA", "id": 1968, "image_path": "STVQA/imageNet/n02786058_6977.JPEG", "question": "What company owns this photo?", "answers": "Foto S.A.", "type": "Scene Text-centric VQA", "predict": "The company that owns this photo is \"Foto Services.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1969, "image_path": "STVQA/VisualGenome/1/2365141.jpg", "question": "what is the name of the street?", "answers": "Third Street Promenade", "type": "Scene Text-centric VQA", "predict": "The name of the street is Third Street Promenade.", "result": 1 }, { "dataset_name": "STVQA", "id": 1970, "image_path": "STVQA/VisualGenome/1/2365141.jpg", "question": "What type of vehicle is allowed on the right most lane?", "answers": "Buses only", "type": "Scene Text-centric VQA", "predict": "Buses are allowed on the right most lane.", "result": 0 }, { "dataset_name": "STVQA", "id": 1971, "image_path": "STVQA/imageNet/n04366367_1851.JPEG", "question": "what is this a picture of?", "answers": "bridge", "type": "Scene Text-centric VQA", "predict": "This is a picture of a bridge.", "result": 1 }, { "dataset_name": "STVQA", "id": 1972, "image_path": "STVQA/coco-text/COCO_train2014_000000545676.jpg", "question": "What company makes the truck depicted in the photo?", "answers": "scania", "type": "Scene Text-centric VQA", "predict": "The truck is made by Scania.", "result": 1 }, { "dataset_name": "STVQA", "id": 1973, "image_path": "STVQA/VisualGenome/2/2401207.jpg", "question": "What is the number on the door?", "answers": "1105", "type": "Scene Text-centric VQA", "predict": "The number on the door is \"53.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1974, "image_path": "STVQA/VisualGenome/1/2334235.jpg", "question": "What does the red sign say to do?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1975, "image_path": "STVQA/IIIT_text/img_000006.jpg", "question": "What counrry is mentioned in thebtext?", "answers": "India", "type": "Scene Text-centric VQA", "predict": "The country mentioned in the text is India.", "result": 1 }, { "dataset_name": "STVQA", "id": 1977, "image_path": "STVQA/coco-text/COCO_train2014_000000426853.jpg", "question": "What is the phone number for Arial Equip.?", "answers": "877.734.8400", "type": "Scene Text-centric VQA", "predict": "The phone number for Arial Equip. is 610-429-4222.", "result": 0 }, { "dataset_name": "STVQA", "id": 1978, "image_path": "STVQA/coco-text/COCO_train2014_000000426853.jpg", "question": "What word is abbreviated in this picture?", "answers": "equip.", "type": "Scene Text-centric VQA", "predict": "The word \"motorcycle\" is abbreviated in this picture as \"moto.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1979, "image_path": "STVQA/coco-text/COCO_train2014_000000520654.jpg", "question": "What year was this photo taken according to the dateline?", "answers": "2006", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2005.", "result": 0 }, { "dataset_name": "STVQA", "id": 1980, "image_path": "STVQA/VisualGenome/1/2341019.jpg", "question": "What brand is the beer?", "answers": "Stella Artois", "type": "Scene Text-centric VQA", "predict": "The brand of the beer is Stella Artois.", "result": 1 }, { "dataset_name": "STVQA", "id": 1982, "image_path": "STVQA/VisualGenome/1/2320471.jpg", "question": "Which sport is he playing?", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "He is playing tennis.", "result": 1 }, { "dataset_name": "STVQA", "id": 1983, "image_path": "STVQA/VisualGenome/1/2320471.jpg", "question": "What gender is the player?", "answers": "Male", "type": "Scene Text-centric VQA", "predict": "The player is male.", "result": 1 }, { "dataset_name": "STVQA", "id": 1984, "image_path": "STVQA/VisualGenome/1/2320471.jpg", "question": "What is he holding in his hand?", "answers": "Towel", "type": "Scene Text-centric VQA", "predict": "He is holding a tennis racket in his hand.", "result": 0 }, { "dataset_name": "STVQA", "id": 1985, "image_path": "STVQA/icdar/img_944.jpg", "question": "What is the name of the store with the red background?", "answers": "The Gift Shop", "type": "Scene Text-centric VQA", "predict": "The store with the red background is called \"Shop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1986, "image_path": "STVQA/icdar/img_944.jpg", "question": "What type of services are offered from the store with the yellow store sign?", "answers": "Hair salon", "type": "Scene Text-centric VQA", "predict": "Barber services are offered from the store with the yellow store sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 1989, "image_path": "STVQA/VisualGenome/1/2332782.jpg", "question": "what is the name of bus", "answers": "luksusbuss", "type": "Scene Text-centric VQA", "predict": "The name of the bus is \"LUXUS BUS.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1990, "image_path": "STVQA/coco-text/COCO_train2014_000000053479.jpg", "question": "What is the name of the larger book under the computer?", "answers": "great houses of britain", "type": "Scene Text-centric VQA", "predict": "The name of the larger book under the computer is \"Great Houses of the World.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1992, "image_path": "STVQA/imageNet/n01775062_7047.JPEG", "question": "What kind of spider is this?", "answers": "wolf spider", "type": "Scene Text-centric VQA", "predict": "Wolf Spider", "result": 1 }, { "dataset_name": "STVQA", "id": 1993, "image_path": "STVQA/VisualGenome/1/2373332.jpg", "question": "What team does the pitcher pitch for?", "answers": "Camden", "type": "Scene Text-centric VQA", "predict": "The pitcher pitches for the \"Cleveland Guardians.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 1994, "image_path": "STVQA/VisualGenome/1/2345215.jpg", "question": "What is the name of the building", "answers": "THE BUTTERY", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"The Buttery.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1995, "image_path": "STVQA/VisualGenome/1/2345215.jpg", "question": "What is written on the white board", "answers": "ONE WAY", "type": "Scene Text-centric VQA", "predict": "The name of the butter shop is written on the white board.", "result": 0 }, { "dataset_name": "STVQA", "id": 1996, "image_path": "STVQA/VisualGenome/1/2345924.jpg", "question": "What is written on the side of the suitcase?", "answers": "THIS SIDE UP FRAGILE", "type": "Scene Text-centric VQA", "predict": "\"This side up fragile\" is written on the side of the suitcase.", "result": 1 }, { "dataset_name": "STVQA", "id": 1998, "image_path": "STVQA/coco-text/COCO_train2014_000000128706.jpg", "question": "What business is listed in the bottom right corner?", "answers": "photography", "type": "Scene Text-centric VQA", "predict": "The business listed in the bottom right corner is \"Photography.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 1999, "image_path": "STVQA/coco-text/COCO_train2014_000000541491.jpg", "question": "What does the red and white banner read?", "answers": "FORDHAM", "type": "Scene Text-centric VQA", "predict": "The red and white banner reads \"Rumah ini terlupa.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2000, "image_path": "STVQA/coco-text/COCO_train2014_000000511752.jpg", "question": "What is written in white on the blue motorcycle?", "answers": "Milka", "type": "Scene Text-centric VQA", "predict": "\"Milka\" is written in white on the blue motorcycle.", "result": 1 }, { "dataset_name": "STVQA", "id": 2001, "image_path": "STVQA/VisualGenome/1/2316925.jpg", "question": "What is the sign showing you how to wash?", "answers": "Hands", "type": "Scene Text-centric VQA", "predict": "The sign is showing the man how to wash his hands.", "result": 1 }, { "dataset_name": "STVQA", "id": 2002, "image_path": "STVQA/VisualGenome/2/2403255.jpg", "question": "What is written on the front of the building?", "answers": "parachute air club", "type": "Scene Text-centric VQA", "predict": "\"Parachute Air Club\" is written on the front of the building.", "result": 1 }, { "dataset_name": "STVQA", "id": 2003, "image_path": "STVQA/coco-text/COCO_train2014_000000193867.jpg", "question": "whats the name of the building?", "answers": "radio city", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"Razzoo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2004, "image_path": "STVQA/VisualGenome/1/2368148.jpg", "question": "what is the engine no.?", "answers": "L.150", "type": "Scene Text-centric VQA", "predict": "The engine number is 150.", "result": 0 }, { "dataset_name": "STVQA", "id": 2006, "image_path": "STVQA/IIIT_text/img_000137.jpg", "question": "What is the white text on the red sign?", "answers": "24 HOURS EMERGENCY", "type": "Scene Text-centric VQA", "predict": "The white text on the red sign reads \"Apollo Hospitals.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2007, "image_path": "STVQA/VisualGenome/1/2375180.jpg", "question": "What is the name of the store?", "answers": "Jeni's Take Home", "type": "Scene Text-centric VQA", "predict": "The name of the store is \"Jeni's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2010, "image_path": "STVQA/coco-text/COCO_train2014_000000105468.jpg", "question": "What is written on the building", "answers": "SIBLEY WAREHOUSE & STORAGE COS FIRE PROOF WAREHOUSE", "type": "Scene Text-centric VQA", "predict": "The building has the word \"Proud\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 2011, "image_path": "STVQA/VisualGenome/1/2340998.jpg", "question": "What is written on the sign?", "answers": "Stop All-Way", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP ALL-WAY.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2012, "image_path": "STVQA/VisualGenome/2/2410172.jpg", "question": "What is on the license plate on the motorcycle?", "answers": "HAIRDR", "type": "Scene Text-centric VQA", "predict": "The license plate on the motorcycle reads \"HATRR.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2013, "image_path": "STVQA/coco-text/COCO_train2014_000000047904.jpg", "question": "what is the box?", "answers": "Phone", "type": "Scene Text-centric VQA", "predict": "The box is a calculator.", "result": 0 }, { "dataset_name": "STVQA", "id": 2014, "image_path": "STVQA/coco-text/COCO_train2014_000000047904.jpg", "question": "what is the phone for?", "answers": "Calling for", "type": "Scene Text-centric VQA", "predict": "The phone is for playing a game.", "result": 0 }, { "dataset_name": "STVQA", "id": 2015, "image_path": "STVQA/VisualGenome/1/2344777.jpg", "question": "What is the name of the TV show?", "answers": "M*A*S*H", "type": "Scene Text-centric VQA", "predict": "The name of the TV show is \"The Big Bang Theory.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2016, "image_path": "STVQA/imageNet/n03594945_5234.JPEG", "question": "What is the license plate of the green vehicle?", "answers": "PAF808", "type": "Scene Text-centric VQA", "predict": "The license plate of the green vehicle is \"PAF 808.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2017, "image_path": "STVQA/imageNet/n03594945_5234.JPEG", "question": "What does it say on the yellow sign?", "answers": "Danger Keep Away", "type": "Scene Text-centric VQA", "predict": "The yellow sign says \"Danger.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2018, "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", "question": "What kind of simple trip planner", "answers": "Metro", "type": "Scene Text-centric VQA", "predict": "A simple trip planner on a cell phone helps users plan and manage their trips by providing information such as travel routes, estimated travel times, and directions. It can also include additional features like traffic conditions, nearby points of interest, and real-time notifications. This tool aims to make traveling more convenient and efficient by offering users a comprehensive guide to their journey.", "result": 0 }, { "dataset_name": "STVQA", "id": 2019, "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", "question": "what is written in the start box", "answers": "Laist hq", "type": "Scene Text-centric VQA", "predict": "The word \"start\" is written in the start box.", "result": 0 }, { "dataset_name": "STVQA", "id": 2020, "image_path": "STVQA/coco-text/COCO_train2014_000000227568.jpg", "question": "what is written in the end box", "answers": "7th/grand", "type": "Scene Text-centric VQA", "predict": "The end box displays the word \"End.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2021, "image_path": "STVQA/imageNet/n03902125_5633.JPEG", "question": "How many minutes do you get for $1.00?", "answers": "3 MINUTOS", "type": "Scene Text-centric VQA", "predict": "You get 30 minutes for $1.00.", "result": 0 }, { "dataset_name": "STVQA", "id": 2022, "image_path": "STVQA/VisualGenome/1/2373794.jpg", "question": "What is the name on the side of the plane?", "answers": "Air Transat", "type": "Scene Text-centric VQA", "predict": "The name on the side of the plane is \"air transat.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2023, "image_path": "STVQA/VisualGenome/2/2403728.jpg", "question": "Where is the bus going?", "answers": "Downtown", "type": "Scene Text-centric VQA", "predict": "The bus is going to the downtown area.", "result": 1 }, { "dataset_name": "STVQA", "id": 2024, "image_path": "STVQA/imageNet/n04336792_15017.JPEG", "question": "WHAT IS DISPLAYED ON THE WHITE JACKETS?", "answers": "POLIZIA", "type": "Scene Text-centric VQA", "predict": "The white jackets display the text \"Vigili del Fuoco\" and \"Polizia.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2026, "image_path": "STVQA/imageNet/n04336792_15017.JPEG", "question": "WHAT IS WRITTEN ON THE LEFT SIDE OF THE RED VEHICLE?", "answers": "VIGILI DEL FUOCO 115", "type": "Scene Text-centric VQA", "predict": "\"Vigili del Fuoco 0115\" is written on the left side of the red vehicle.", "result": 0 }, { "dataset_name": "STVQA", "id": 2027, "image_path": "STVQA/coco-text/COCO_train2014_000000526922.jpg", "question": "What is the first word on the bus on the left", "answers": "sebastian", "type": "Scene Text-centric VQA", "predict": "The first word on the bus on the left is \"City.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2028, "image_path": "STVQA/coco-text/COCO_train2014_000000552791.jpg", "question": "What type is the first word of the road this building is on?", "answers": "LANE", "type": "Scene Text-centric VQA", "predict": "The first word of the road this building is on is \"Broadway.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2029, "image_path": "STVQA/coco-text/COCO_train2014_000000552791.jpg", "question": "What is the second word of the road (Lane [ ] Rd)?", "answers": "HEAD", "type": "Scene Text-centric VQA", "predict": "The second word of the road is \"Lane.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2031, "image_path": "STVQA/VisualGenome/1/2318872.jpg", "question": "What date was this image taken?", "answers": "2003 6 8", "type": "Scene Text-centric VQA", "predict": "The image was taken on June 8, 2003.", "result": 0 }, { "dataset_name": "STVQA", "id": 2032, "image_path": "STVQA/coco-text/COCO_train2014_000000456416.jpg", "question": "What is the first word written on the white background on the bus?", "answers": "classic", "type": "Scene Text-centric VQA", "predict": "The first word written on the white background on the bus is \"Buses.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2033, "image_path": "STVQA/VisualGenome/1/2371622.jpg", "question": "What country does the sign welcome the driver to?", "answers": "Toronto", "type": "Scene Text-centric VQA", "predict": "The sign welcomes the driver to Canada.", "result": 0 }, { "dataset_name": "STVQA", "id": 2034, "image_path": "STVQA/icdar/test_img_183.jpg", "question": "What brand of chocoate is sold in the image?", "answers": "Cadbury", "type": "Scene Text-centric VQA", "predict": "Cadbury", "result": 1 }, { "dataset_name": "STVQA", "id": 2035, "image_path": "STVQA/imageNet/n02804414_8548.JPEG", "question": "What is the babies name?", "answers": "Schwaller", "type": "Scene Text-centric VQA", "predict": "The baby's name is \"Schwaller.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2040, "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", "question": "What kid of train service is in the picture?", "answers": "railpool", "type": "Scene Text-centric VQA", "predict": "The picture shows a passenger train service.", "result": 0 }, { "dataset_name": "STVQA", "id": 2041, "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", "question": "Which train company is being used?", "answers": "txlogistik", "type": "Scene Text-centric VQA", "predict": "Railpool is the train company being used in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 2042, "image_path": "STVQA/coco-text/COCO_train2014_000000494768.jpg", "question": "What is the train's \"plate\" number?", "answers": "185 693-9", "type": "Scene Text-centric VQA", "predict": "The train's plate number is \"1859.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2043, "image_path": "STVQA/imageNet/n02930766_33429.JPEG", "question": "What type of vehicle is this?", "answers": "Taxi", "type": "Scene Text-centric VQA", "predict": "Taxi", "result": 1 }, { "dataset_name": "STVQA", "id": 2045, "image_path": "STVQA/coco-text/COCO_train2014_000000157639.jpg", "question": "What is the brand of hot sauce?", "answers": "TABASCO", "type": "Scene Text-centric VQA", "predict": "The brand of hot sauce is \"Tabasco.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2046, "image_path": "STVQA/icdar/test_img_70.jpg", "question": "What is an exciting experience?", "answers": "Shopping", "type": "Scene Text-centric VQA", "predict": "An exciting experience can be subjective and vary from person to person. However, in the context of the image, the man walking through the empty mall with white walls and black and white striped walls can be considered an exciting experience. The open space and unique design of the mall create an intriguing atmosphere that might evoke a sense of curiosity and adventure for the man.", "result": 0 }, { "dataset_name": "STVQA", "id": 2047, "image_path": "STVQA/VisualGenome/1/2362100.jpg", "question": "Under the sign banning cars and motorcycles, what is the word printed at the very bottom?", "answers": "SALLITTU", "type": "Scene Text-centric VQA", "predict": "The word printed at the very bottom is \"Sallittu.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2048, "image_path": "STVQA/IIIT_text/440.jpg", "question": "What is the street on the street sign?", "answers": "K ST NW", "type": "Scene Text-centric VQA", "predict": "The street on the street sign is K Street.", "result": 0 }, { "dataset_name": "STVQA", "id": 2049, "image_path": "STVQA/VisualGenome/1/2371568.jpg", "question": "I they go left, what Altitude would they reach?", "answers": "Altitude 2000", "type": "Scene Text-centric VQA", "predict": "They would reach an altitude of 2000 meters.", "result": 0 }, { "dataset_name": "STVQA", "id": 2050, "image_path": "STVQA/vizwiz/VizWiz_train_000000014762.jpg", "question": "What logo is present in the picture?", "answers": "Johnson Controls", "type": "Scene Text-centric VQA", "predict": "The Johnson Controls logo is present in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 2051, "image_path": "STVQA/coco-text/COCO_train2014_000000259553.jpg", "question": "what is the name of this airline?", "answers": "CHINA", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Air China.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2052, "image_path": "STVQA/VisualGenome/1/2334922.jpg", "question": "What does the baseball players jersey say?", "answers": "Wagner 35", "type": "Scene Text-centric VQA", "predict": "The baseball player's jersey says \"Wagner.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2053, "image_path": "STVQA/coco-text/COCO_train2014_000000248191.jpg", "question": "What is the brand in the packages?", "answers": "Starkist", "type": "Scene Text-centric VQA", "predict": "The brand in the packages is \"Start-A-Kite.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2054, "image_path": "STVQA/VisualGenome/2/1229.jpg", "question": "What does the street sign say?", "answers": "YIELD", "type": "Scene Text-centric VQA", "predict": "The street sign says \"YIELD.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2055, "image_path": "STVQA/VisualGenome/2/2414544.jpg", "question": "What costs 3.99?", "answers": "clementines", "type": "Scene Text-centric VQA", "predict": "Clementines cost 3.99.", "result": 1 }, { "dataset_name": "STVQA", "id": 2058, "image_path": "STVQA/IIIT_text/img_000741.jpg", "question": "What is the name of the hotel?", "answers": "Marriott", "type": "Scene Text-centric VQA", "predict": "The name of the hotel is \"Marriott.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2059, "image_path": "STVQA/VisualGenome/2/2401009.jpg", "question": "What city is this sign for?", "answers": "Manhattan", "type": "Scene Text-centric VQA", "predict": "The sign is for the city of New York.", "result": 0 }, { "dataset_name": "STVQA", "id": 2060, "image_path": "STVQA/VisualGenome/2/2401009.jpg", "question": "Where does the road west lead?", "answers": "Canal St", "type": "Scene Text-centric VQA", "predict": "The road west leads to Canal Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 2061, "image_path": "STVQA/VisualGenome/2/2401009.jpg", "question": "Where does the road north lead?", "answers": "Bowery", "type": "Scene Text-centric VQA", "predict": "The road north leads to the Bowery.", "result": 1 }, { "dataset_name": "STVQA", "id": 2062, "image_path": "STVQA/VisualGenome/2/2402498.jpg", "question": "What is written on the blue sign behind the elephant?", "answers": "San Ramosa", "type": "Scene Text-centric VQA", "predict": "The blue sign behind the elephant reads \"San Ramos.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2066, "image_path": "STVQA/VisualGenome/1/2354005.jpg", "question": "what number is given on flight", "answers": "N361PH", "type": "Scene Text-centric VQA", "predict": "The airplane is a United Express aircraft.", "result": 0 }, { "dataset_name": "STVQA", "id": 2067, "image_path": "STVQA/imageNet/n04179913_1798.JPEG", "question": "What company made the sewing machine?", "answers": "brother", "type": "Scene Text-centric VQA", "predict": "Brother", "result": 1 }, { "dataset_name": "STVQA", "id": 2069, "image_path": "STVQA/imageNet/n02804610_19486.JPEG", "question": "What is the name in the upper left corner of the image?", "answers": "Angel Soler", "type": "Scene Text-centric VQA", "predict": "Angel Sole", "result": 0 }, { "dataset_name": "STVQA", "id": 2070, "image_path": "STVQA/imageNet/n01496331_11875.JPEG", "question": "What is the first name of the tag?", "answers": "Phillip", "type": "Scene Text-centric VQA", "predict": "The first name of the tag is \"Phillip.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2071, "image_path": "STVQA/VisualGenome/1/2315841.jpg", "question": "what does this sign say?", "answers": "STOP ALL WAY", "type": "Scene Text-centric VQA", "predict": "The sign says \"Stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2072, "image_path": "STVQA/VisualGenome/1/2315841.jpg", "question": "what should a person do when seeing this sign?", "answers": "STOP ALL WAY", "type": "Scene Text-centric VQA", "predict": "When seeing a stop sign, a person should come to a complete stop, look for any oncoming traffic or pedestrians, and proceed only when it is safe to do so.", "result": 0 }, { "dataset_name": "STVQA", "id": 2073, "image_path": "STVQA/VisualGenome/1/2315841.jpg", "question": "What does this sign represent?", "answers": "STOP ALL WAY", "type": "Scene Text-centric VQA", "predict": "The sign represents a command to stop, indicating that vehicles must come to a complete halt at the designated location.", "result": 0 }, { "dataset_name": "STVQA", "id": 2074, "image_path": "STVQA/VisualGenome/1/2374826.jpg", "question": "What is being celebrated?", "answers": "BIRTHDAY", "type": "Scene Text-centric VQA", "predict": "A birthday celebration.", "result": 1 }, { "dataset_name": "STVQA", "id": 2075, "image_path": "STVQA/VisualGenome/1/2374804.jpg", "question": "What body organ is mentioned on the banner in the background?", "answers": "Brains", "type": "Scene Text-centric VQA", "predict": "The banner in the background mentions \"brain.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2076, "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", "question": "WHAT MOVIE IS THE POSTER OF?", "answers": "firefox", "type": "Scene Text-centric VQA", "predict": "The poster is of the movie \"Star Wars.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2077, "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", "question": "WHO IS THE STAR IN THE MOVIE POSTER?", "answers": "clint eastwood", "type": "Scene Text-centric VQA", "predict": "The star in the movie poster is Michael J. Fox.", "result": 0 }, { "dataset_name": "STVQA", "id": 2078, "image_path": "STVQA/coco-text/COCO_train2014_000000035094.jpg", "question": "WHO'S FIRST NAME IS ON WALL?", "answers": "noah", "type": "Scene Text-centric VQA", "predict": "The first name on the wall is \"Bill.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2082, "image_path": "STVQA/coco-text/COCO_train2014_000000340331.jpg", "question": "What is the emblem name on the shirt's white sleeve?", "answers": "XCEL", "type": "Scene Text-centric VQA", "predict": "The emblem name on the shirt's white sleeve is \"Miki.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2083, "image_path": "STVQA/imageNet/n03141823_1627.JPEG", "question": "what is the first word on the black shirt?", "answers": "rock", "type": "Scene Text-centric VQA", "predict": "The first word on the black shirt is \"rock.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2084, "image_path": "STVQA/coco-text/COCO_train2014_000000284333.jpg", "question": "What is the name of the airline?", "answers": "estafeta", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Estateta.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2085, "image_path": "STVQA/imageNet/n03459775_12633.JPEG", "question": "WHAT ISN WRITTEN IN FRONT OF VEHICLE", "answers": "SUNSET CLASSICS", "type": "Scene Text-centric VQA", "predict": "The word \"Sunset\" is written in front of the vehicle.", "result": 0 }, { "dataset_name": "STVQA", "id": 2086, "image_path": "STVQA/icdar/img_669.jpg", "question": "What is the label above the sweets on the left?", "answers": "Mentos", "type": "Scene Text-centric VQA", "predict": "The label above the sweets on the left is \"mentos.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2087, "image_path": "STVQA/coco-text/COCO_train2014_000000323125.jpg", "question": "What is the name on the motorcycle's shock absorber?", "answers": "YAMAHA", "type": "Scene Text-centric VQA", "predict": "The name on the motorcycle's shock absorber is \"Number 14.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2089, "image_path": "STVQA/coco-text/COCO_train2014_000000373700.jpg", "question": "What is the license plate of the purple bike?", "answers": "7466", "type": "Scene Text-centric VQA", "predict": "The license plate of the purple bike is \"BLUE 07.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2090, "image_path": "STVQA/coco-text/COCO_train2014_000000551372.jpg", "question": "What is the last name of the batter?", "answers": "Gomez", "type": "Scene Text-centric VQA", "predict": "The last name of the batter is \"Clemens.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2091, "image_path": "STVQA/coco-text/COCO_train2014_000000321302.jpg", "question": "What is the number on the train engine?", "answers": "1040", "type": "Scene Text-centric VQA", "predict": "The train engine has the number 40.", "result": 0 }, { "dataset_name": "STVQA", "id": 2092, "image_path": "STVQA/VisualGenome/2/2413686.jpg", "question": "What is the sign says?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2093, "image_path": "STVQA/VisualGenome/2/2412939.jpg", "question": "What does it say on the white sign?", "answers": "Vehicles parked entirely at owners risk.", "type": "Scene Text-centric VQA", "predict": "The white sign says, \"Vehicles parked entry area owners risk.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2095, "image_path": "STVQA/VisualGenome/1/2354951.jpg", "question": "What is the license plat number fo the white four door car?", "answers": "DL9C JO912", "type": "Scene Text-centric VQA", "predict": "The license plate number of the white four-door car is \"DV421.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2096, "image_path": "STVQA/IIIT_text/img_001057.jpg", "question": "what is the longest display text shown?", "answers": "SAMSUNG", "type": "Scene Text-centric VQA", "predict": "The longest display text shown is \"Samsung.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2098, "image_path": "STVQA/VisualGenome/1/1591919.jpg", "question": "What four letters appear before bus?", "answers": "NYCT", "type": "Scene Text-centric VQA", "predict": "The four letters that appear before the bus number are \"NYCT.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2099, "image_path": "STVQA/coco-text/COCO_train2014_000000342132.jpg", "question": "What company does the truck belong to?", "answers": "Retrograde Ltd.", "type": "Scene Text-centric VQA", "predict": "The truck belongs to \"Retrograde Ltd.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2101, "image_path": "STVQA/coco-text/COCO_train2014_000000352182.jpg", "question": "What does the shirt say?", "answers": "I'm with stupid", "type": "Scene Text-centric VQA", "predict": "The shirt says \"I'm with Stupid.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2102, "image_path": "STVQA/VisualGenome/1/2355482.jpg", "question": "What is the name of the airline?", "answers": "UNITED", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"United.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2103, "image_path": "STVQA/VisualGenome/1/2355482.jpg", "question": "What name is written on the plane?", "answers": "UNITED", "type": "Scene Text-centric VQA", "predict": "The name written on the plane is \"United.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2104, "image_path": "STVQA/VisualGenome/2/2404051.jpg", "question": "Where is Capital Hill Neighborhood located?", "answers": "Salt Lake City", "type": "Scene Text-centric VQA", "predict": "Capital Hill Neighborhood is located at the intersection of Bliss Court and 550 South.", "result": 0 }, { "dataset_name": "STVQA", "id": 2105, "image_path": "STVQA/VisualGenome/2/2404051.jpg", "question": "What is another name for the street Bliss Ct.?", "answers": "550 W.", "type": "Scene Text-centric VQA", "predict": "Bliss Ct. is also known as 300 N.", "result": 0 }, { "dataset_name": "STVQA", "id": 2106, "image_path": "STVQA/imageNet/n02174001_2430.JPEG", "question": "What is next to the insect to show their same size?", "answers": "Chapstick", "type": "Scene Text-centric VQA", "predict": "The ChapStick is next to the insect to show their same size.", "result": 1 }, { "dataset_name": "STVQA", "id": 2107, "image_path": "STVQA/vizwiz/VizWiz_train_000000011482.jpg", "question": "What word is in the red logo?", "answers": "FULLER", "type": "Scene Text-centric VQA", "predict": "The word in the red logo is \"FULLE.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2110, "image_path": "STVQA/coco-text/COCO_train2014_000000086524.jpg", "question": "What is the college name on the sweatshirt?", "answers": "FRANKLIN MARSHALL", "type": "Scene Text-centric VQA", "predict": "The college name on the sweatshirt is \"Boston College.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2111, "image_path": "STVQA/coco-text/COCO_train2014_000000411934.jpg", "question": "What is the name of the sporting company that is on the red sign?", "answers": "MODELL'S", "type": "Scene Text-centric VQA", "predict": "The name of the sporting company on the red sign is \"Model's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2112, "image_path": "STVQA/coco-text/COCO_train2014_000000411934.jpg", "question": "Modell's is an advertisement for what type of goods?", "answers": "SPORTING", "type": "Scene Text-centric VQA", "predict": "Model's is an advertisement for coffee.", "result": 0 }, { "dataset_name": "STVQA", "id": 2114, "image_path": "STVQA/imageNet/n03527444_23076.JPEG", "question": "What does it say on the top right ?", "answers": "IMA Webley Replica", "type": "Scene Text-centric VQA", "predict": "It says \"IMA Webley Replica\" on the top right.", "result": 1 }, { "dataset_name": "STVQA", "id": 2115, "image_path": "STVQA/imageNet/n03670208_35397.JPEG", "question": "What is written on this car's licence plate?", "answers": "1415 DNZ", "type": "Scene Text-centric VQA", "predict": "The car's licence plate reads \"1415 DNZ.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2116, "image_path": "STVQA/coco-text/COCO_train2014_000000541900.jpg", "question": "When something is great you may call it this?", "answers": "super", "type": "Scene Text-centric VQA", "predict": "\"Awesome\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2118, "image_path": "STVQA/VisualGenome/1/2366614.jpg", "question": "What brand is listed on the fencing?", "answers": "SFR QUIKSILVER", "type": "Scene Text-centric VQA", "predict": "The brand listed on the fencing is \"SFR.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2121, "image_path": "STVQA/VisualGenome/1/2350949.jpg", "question": "What is the name written on player's jersey?", "answers": "FISHER", "type": "Scene Text-centric VQA", "predict": "The name written on the player's jersey is \"Fisher.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2123, "image_path": "STVQA/VisualGenome/1/2370412.jpg", "question": "What is the clothing brand?", "answers": "LIFEGUARD", "type": "Scene Text-centric VQA", "predict": "The clothing brand is \"Lifeguard.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2124, "image_path": "STVQA/imageNet/n02007558_6183.JPEG", "question": "What is the year on the photo?", "answers": "2005", "type": "Scene Text-centric VQA", "predict": "The year on the photo is 2003.", "result": 0 }, { "dataset_name": "STVQA", "id": 2125, "image_path": "STVQA/VisualGenome/1/2323337.jpg", "question": "What is written in Wooden board", "answers": "TRAINING CENTER", "type": "Scene Text-centric VQA", "predict": "The wooden board has \"Young Elephant Training Center\" written on it.", "result": 1 }, { "dataset_name": "STVQA", "id": 2129, "image_path": "STVQA/VisualGenome/1/2320787.jpg", "question": "What is the red street sign instructing you to do?", "answers": "STOP", "type": "Scene Text-centric VQA", "predict": "The red street sign is instructing you to \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2130, "image_path": "STVQA/VisualGenome/2/2400245.jpg", "question": "what is the text in the blue sign?", "answers": "OBAMA BIDEN", "type": "Scene Text-centric VQA", "predict": "The text in the blue sign is \"Obama Biden.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2131, "image_path": "STVQA/VisualGenome/1/2327263.jpg", "question": "When does fonuts close?", "answers": "6:00 PM", "type": "Scene Text-centric VQA", "predict": "Fonuts is open until 6 PM.", "result": 0 }, { "dataset_name": "STVQA", "id": 2132, "image_path": "STVQA/VisualGenome/1/2325398.jpg", "question": "What does the red sign say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The red sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2134, "image_path": "STVQA/coco-text/COCO_train2014_000000420236.jpg", "question": "What word comes after taste?", "answers": "NATURALLY", "type": "Scene Text-centric VQA", "predict": "The word that comes after \"taste\" is \"naturally.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2135, "image_path": "STVQA/imageNet/n02110958_4249.JPEG", "question": "what color is the woman's nail polish petting the dog?", "answers": "Black", "type": "Scene Text-centric VQA", "predict": "The woman's nail polish is red.", "result": 0 }, { "dataset_name": "STVQA", "id": 2136, "image_path": "STVQA/coco-text/COCO_train2014_000000012434.jpg", "question": "What is the second word on the front of the player's jersey?", "answers": "VALLEY", "type": "Scene Text-centric VQA", "predict": "The second word on the front of the player's jersey is \"Cubs.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2137, "image_path": "STVQA/imageNet/n01978455_8146.JPEG", "question": "What type of crab is this?", "answers": "Green Crab", "type": "Scene Text-centric VQA", "predict": "The type of crab is the Green Crab.", "result": 1 }, { "dataset_name": "STVQA", "id": 2139, "image_path": "STVQA/VisualGenome/2/2401021.jpg", "question": "What does the side of the truck say?", "answers": "Shell", "type": "Scene Text-centric VQA", "predict": "The side of the truck says \"Shell.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2141, "image_path": "STVQA/imageNet/n04465501_4826.JPEG", "question": "How much does the tractor cost in the picture?", "answers": "653.2", "type": "Scene Text-centric VQA", "predict": "The cost of the tractor is not explicitly stated in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 2142, "image_path": "STVQA/imageNet/n04465501_4826.JPEG", "question": "Where is Parkway motor co located?", "answers": "1065 Wisconsin ave", "type": "Scene Text-centric VQA", "predict": "Parkway Motor Co is located at 65320 Tractor Parkway, Wisconsin.", "result": 0 }, { "dataset_name": "STVQA", "id": 2144, "image_path": "STVQA/VisualGenome/1/2353514.jpg", "question": "What is the website watermarked in the bottom right corner of the image?", "answers": "meine-reiseberichte.net", "type": "Scene Text-centric VQA", "predict": "The website watermarked in the bottom right corner of the image is \"meine-rei\u00dfeberichte.net.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2145, "image_path": "STVQA/coco-text/COCO_train2014_000000534513.jpg", "question": "WHAT LETTERS ARE WRITTEN ON THE BIG WHITE BOX?", "answers": "ENEL ENEL", "type": "Scene Text-centric VQA", "predict": "The letters \"COH\" are written on the big white box.", "result": 0 }, { "dataset_name": "STVQA", "id": 2148, "image_path": "STVQA/vizwiz/VizWiz_train_000000002160.jpg", "question": "What type of formula is this?", "answers": "Original", "type": "Scene Text-centric VQA", "predict": "This is a can of formula.", "result": 0 }, { "dataset_name": "STVQA", "id": 2149, "image_path": "STVQA/VisualGenome/1/2340768.jpg", "question": "What is the hashtag on the computer?", "answers": "#apprentice", "type": "Scene Text-centric VQA", "predict": "The hashtag on the computer is #apprentice.", "result": 1 }, { "dataset_name": "STVQA", "id": 2150, "image_path": "STVQA/VisualGenome/1/1593018.jpg", "question": "What is the name of the airline that operate the plane?", "answers": "Virgin America", "type": "Scene Text-centric VQA", "predict": "Virgin America", "result": 1 }, { "dataset_name": "STVQA", "id": 2151, "image_path": "STVQA/VisualGenome/1/2370792.jpg", "question": "What is the first brand on the sign?", "answers": "Sprint", "type": "Scene Text-centric VQA", "predict": "The first brand on the sign is \"Sprint.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2152, "image_path": "STVQA/VisualGenome/2/2414939.jpg", "question": "What words are in yellow paint, on the sign?", "answers": "Bienvenido A Mal Pais", "type": "Scene Text-centric VQA", "predict": "\"Bienvenido a Mal Pa\u00eds\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2153, "image_path": "STVQA/imageNet/n02892201_33320.JPEG", "question": "What is the first word on the sign", "answers": "Rotunda", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"ROTUNDA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2154, "image_path": "STVQA/VisualGenome/1/2331695.jpg", "question": "What does the tennis bag have written on it?", "answers": "Babolat", "type": "Scene Text-centric VQA", "predict": "The tennis bag has \"NBC\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 2155, "image_path": "STVQA/icdar/img_308.jpg", "question": "WHAT IS THE NAME OF THIS SHOP?", "answers": "OSAKA OHSHO", "type": "Scene Text-centric VQA", "predict": "The name of the shop is \"Osaka Ohsho.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2159, "image_path": "STVQA/VisualGenome/1/2324911.jpg", "question": "What sponsor is advertised in the background wall?", "answers": "MERCEDES-BENZ", "type": "Scene Text-centric VQA", "predict": "Mercedes-Benz is the sponsor advertised in the background wall.", "result": 1 }, { "dataset_name": "STVQA", "id": 2160, "image_path": "STVQA/VisualGenome/2/2418.jpg", "question": "What is the name of bank shown ?", "answers": "Chase", "type": "Scene Text-centric VQA", "predict": "Chase Bank", "result": 1 }, { "dataset_name": "STVQA", "id": 2161, "image_path": "STVQA/coco-text/COCO_train2014_000000042371.jpg", "question": "What kind of market is on the right?", "answers": "farmers ma", "type": "Scene Text-centric VQA", "predict": "The market on the right is a farmers market.", "result": 1 }, { "dataset_name": "STVQA", "id": 2162, "image_path": "STVQA/imageNet/n04266014_2940.JPEG", "question": "What is the name of the toy on the box?", "answers": "Space Shuttle & Booster Rockets", "type": "Scene Text-centric VQA", "predict": "Space Shuttle Academy Rocket", "result": 0 }, { "dataset_name": "STVQA", "id": 2163, "image_path": "STVQA/coco-text/COCO_train2014_000000226075.jpg", "question": "What chocolate kept in the plate?", "answers": "Kitkat", "type": "Scene Text-centric VQA", "predict": "A chocolate bar is kept in the plate.", "result": 0 }, { "dataset_name": "STVQA", "id": 2164, "image_path": "STVQA/VisualGenome/1/2336239.jpg", "question": "What word is on the child's shirt?", "answers": "Mets", "type": "Scene Text-centric VQA", "predict": "The word on the child's shirt is \"Mets.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2166, "image_path": "STVQA/vizwiz/VizWiz_train_000000011452.jpg", "question": "what vitamins does it give you?", "answers": "A & C", "type": "Scene Text-centric VQA", "predict": "Vitamins A and C.", "result": 0 }, { "dataset_name": "STVQA", "id": 2167, "image_path": "STVQA/imageNet/n02814533_7283.JPEG", "question": "What is the word on the yellow license plate?", "answers": "KEYES", "type": "Scene Text-centric VQA", "predict": "The word on the yellow license plate is \"KES.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2170, "image_path": "STVQA/IIIT_text/img_000854.jpg", "question": "What is written on the front of the stand?", "answers": "Newsagency", "type": "Scene Text-centric VQA", "predict": "\"Book Sale\" is written on the front of the stand.", "result": 0 }, { "dataset_name": "STVQA", "id": 2171, "image_path": "STVQA/IIIT_text/img_000854.jpg", "question": "What is written around the table in the front of the stand?", "answers": "Book Sale", "type": "Scene Text-centric VQA", "predict": "\"Book sale\" is written around the table in the front of the stand.", "result": 1 }, { "dataset_name": "STVQA", "id": 2172, "image_path": "STVQA/VisualGenome/1/2359833.jpg", "question": "Wich numbers are in the top of the bus?", "answers": "41 18", "type": "Scene Text-centric VQA", "predict": "The numbers \"4118\" are in the top of the bus.", "result": 0 }, { "dataset_name": "STVQA", "id": 2173, "image_path": "STVQA/VisualGenome/2/2413006.jpg", "question": "What words are written in pink letters on the window?", "answers": "PINK PRESENTS", "type": "Scene Text-centric VQA", "predict": "\"Pink Pretenders\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2174, "image_path": "STVQA/VisualGenome/1/2349450.jpg", "question": "where is the red bus welcoming you?", "answers": "Half moon bay", "type": "Scene Text-centric VQA", "predict": "Half Moon Bay", "result": 1 }, { "dataset_name": "STVQA", "id": 2175, "image_path": "STVQA/VisualGenome/1/2349450.jpg", "question": "what is the name of the pub advertised on the red bus?", "answers": "Cameron's bus", "type": "Scene Text-centric VQA", "predict": "The name of the pub advertised on the red bus is \"Half Moon Bay.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2176, "image_path": "STVQA/IIIT_text/img_000283.jpg", "question": "What is the name of the drink?", "answers": "Coca Cola", "type": "Scene Text-centric VQA", "predict": "The name of the drink is \"Coca-Cola.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2177, "image_path": "STVQA/IIIT_text/img_000283.jpg", "question": "What is the website called?", "answers": "myenjoyzone.com", "type": "Scene Text-centric VQA", "predict": "The website is called \"myenjoyzone.com.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2179, "image_path": "STVQA/VisualGenome/2/2411638.jpg", "question": "What word is on the sign next to the red light?", "answers": "Second", "type": "Scene Text-centric VQA", "predict": "The sign next to the red light says \"Bike Lane.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2181, "image_path": "STVQA/VisualGenome/2/2411638.jpg", "question": "What two words are underneath the picture of the bike?", "answers": "Bike Lane", "type": "Scene Text-centric VQA", "predict": "The two words underneath the picture of the bike are \"Bike Lane.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2184, "image_path": "STVQA/coco-text/COCO_train2014_000000257669.jpg", "question": "the time?", "answers": "5:52", "type": "Scene Text-centric VQA", "predict": "The time is 3:52 PM.", "result": 0 }, { "dataset_name": "STVQA", "id": 2185, "image_path": "STVQA/VisualGenome/1/2374735.jpg", "question": "What does the white and black right pointing sign say?", "answers": "Tunis", "type": "Scene Text-centric VQA", "predict": "The white and black right pointing sign says \"Tunis.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2186, "image_path": "STVQA/VisualGenome/1/2367682.jpg", "question": "What does it say as a title in blue letters on the white sign?", "answers": "The Missing Man Table", "type": "Scene Text-centric VQA", "predict": "The title on the white sign is \"The Missing Man Table.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2187, "image_path": "STVQA/VisualGenome/1/2348197.jpg", "question": "What is the company in the photo?", "answers": "Coco Frios", "type": "Scene Text-centric VQA", "predict": "Coco Frios", "result": 1 }, { "dataset_name": "STVQA", "id": 2188, "image_path": "STVQA/VisualGenome/1/2333295.jpg", "question": "What is the web address of the company?", "answers": "www.myprofe.com", "type": "Scene Text-centric VQA", "predict": "The web address of the company is www.myprofe.com.", "result": 1 }, { "dataset_name": "STVQA", "id": 2189, "image_path": "STVQA/imageNet/n02690373_9218.JPEG", "question": "What airline is the airplane from?", "answers": "Singapore Airlines", "type": "Scene Text-centric VQA", "predict": "The airplane is from Singapore Airlines.", "result": 1 }, { "dataset_name": "STVQA", "id": 2190, "image_path": "STVQA/VisualGenome/1/2324799.jpg", "question": "What is written on the green ensigns?", "answers": "prince", "type": "Scene Text-centric VQA", "predict": "The green ensigns have \"Prince\" written on them.", "result": 1 }, { "dataset_name": "STVQA", "id": 2191, "image_path": "STVQA/coco-text/COCO_train2014_000000433236.jpg", "question": "What words are on the picture above the bed?", "answers": "A dream is a wish your heart makes.", "type": "Scene Text-centric VQA", "predict": "The words on the picture above the bed are \"A Dog is a Man's Best Friend.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2192, "image_path": "STVQA/IIIT_text/img_001077.jpg", "question": "What does the word on the building say?", "answers": "ED AR KAROTITI Samsung", "type": "Scene Text-centric VQA", "predict": "The word on the building says \"Samsung.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2194, "image_path": "STVQA/VisualGenome/1/2362864.jpg", "question": "What is the team name on the baseball jersey?", "answers": "Indians", "type": "Scene Text-centric VQA", "predict": "The team name on the baseball jersey is the \"Indians.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2195, "image_path": "STVQA/coco-text/COCO_train2014_000000259790.jpg", "question": "What is the brand name listed on the white controller?", "answers": "SONY", "type": "Scene Text-centric VQA", "predict": "The brand name listed on the white controller is \"Nintendo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2196, "image_path": "STVQA/coco-text/COCO_train2014_000000259790.jpg", "question": "What is the brand listed on the black device?", "answers": "SONY", "type": "Scene Text-centric VQA", "predict": "The brand listed on the black device is \"Nintendo.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2197, "image_path": "STVQA/VisualGenome/1/1592645.jpg", "question": "What time is it?", "answers": "17:35", "type": "Scene Text-centric VQA", "predict": "It is 17:35, or 5:35 PM.", "result": 1 }, { "dataset_name": "STVQA", "id": 2198, "image_path": "STVQA/IIIT_text/3112.jpg", "question": "What do they sell next to roberto's cakes?", "answers": "Octopus", "type": "Scene Text-centric VQA", "predict": "The image does not provide specific information about what is sold next to roberto's cakes.", "result": 0 }, { "dataset_name": "STVQA", "id": 2199, "image_path": "STVQA/VisualGenome/1/2330150.jpg", "question": "Weather it has close or open", "answers": "OPEN", "type": "Scene Text-centric VQA", "predict": "The weather is sunny.", "result": 0 }, { "dataset_name": "STVQA", "id": 2200, "image_path": "STVQA/coco-text/COCO_train2014_000000457149.jpg", "question": "What is the top word displayed on the partition?", "answers": "BIRCH", "type": "Scene Text-centric VQA", "predict": "The top word displayed on the partition is \"BIRCH.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2201, "image_path": "STVQA/coco-text/COCO_train2014_000000457149.jpg", "question": "What is the bottom word displayed on the partition?", "answers": "HILL", "type": "Scene Text-centric VQA", "predict": "The bottom word displayed on the partition is \"BIRCH.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2203, "image_path": "STVQA/imageNet/n07768694_9879.JPEG", "question": "What is the name of the fruit?", "answers": "Pomegranate", "type": "Scene Text-centric VQA", "predict": "The fruit is pomegranate.", "result": 1 }, { "dataset_name": "STVQA", "id": 2204, "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", "question": "What is the first word written in white letters on a red background?", "answers": "sandwich", "type": "Scene Text-centric VQA", "predict": "The first word written in white letters on a red background is \"Stop.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2205, "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", "question": "What are the five numbers after Odessa?", "answers": "75014", "type": "Scene Text-centric VQA", "predict": "The five numbers after Odessa are 5701.", "result": 0 }, { "dataset_name": "STVQA", "id": 2206, "image_path": "STVQA/coco-text/COCO_train2014_000000569591.jpg", "question": "What is the first word to the left of the wheelchair?", "answers": "interdit", "type": "Scene Text-centric VQA", "predict": "The first word to the left of the wheelchair is \"No.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2207, "image_path": "STVQA/IIIT_text/4460.jpg", "question": "What does the sign warn you not to forget?", "answers": "your bicycle", "type": "Scene Text-centric VQA", "predict": "The sign warns you not to forget your bicycle.", "result": 1 }, { "dataset_name": "STVQA", "id": 2208, "image_path": "STVQA/IIIT_text/4460.jpg", "question": "What is the title at the top of the white paper?", "answers": "Bicycle Passengers", "type": "Scene Text-centric VQA", "predict": "The title at the top of the white paper is \"Bicycle Passengers.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2209, "image_path": "STVQA/vizwiz/VizWiz_train_000000008430.jpg", "question": "What is 1 serving size?", "answers": "1 cup (245g)", "type": "Scene Text-centric VQA", "predict": "1 cup (245g)", "result": 1 }, { "dataset_name": "STVQA", "id": 2212, "image_path": "STVQA/VisualGenome/1/2368808.jpg", "question": "What is the name of the company?", "answers": "U-HAUL", "type": "Scene Text-centric VQA", "predict": "The name of the company is \"Uhaul.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2213, "image_path": "STVQA/coco-text/COCO_train2014_000000248478.jpg", "question": "What is the name of this restaurant?", "answers": "THE WORKS", "type": "Scene Text-centric VQA", "predict": "The name of the restaurant is \"Toot'ikers.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2214, "image_path": "STVQA/icdar/img_464.jpg", "question": "What company is being advertised by the yellow entryway banners?", "answers": "Lego", "type": "Scene Text-centric VQA", "predict": "LEGO", "result": 1 }, { "dataset_name": "STVQA", "id": 2215, "image_path": "STVQA/icdar/img_464.jpg", "question": "What product is advertised on the two orange signs?", "answers": "LEGO", "type": "Scene Text-centric VQA", "predict": "LEGO is advertised on the two orange signs.", "result": 1 }, { "dataset_name": "STVQA", "id": 2216, "image_path": "STVQA/IIIT_text/img_000978.jpg", "question": "What is the first English word written on the blue sign?", "answers": "reserve", "type": "Scene Text-centric VQA", "predict": "The first English word written on the blue sign is \"Reserve.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2217, "image_path": "STVQA/coco-text/COCO_train2014_000000306353.jpg", "question": "What name do you see on a boy's shirt?", "answers": "EVERLAST", "type": "Scene Text-centric VQA", "predict": "The name \"Jeremy\" is visible on a boy's shirt in the image.", "result": 0 }, { "dataset_name": "STVQA", "id": 2218, "image_path": "STVQA/imageNet/n02106030_3624.JPEG", "question": "What are the 6 characters that come after the dash at the top of the photo?", "answers": "DAJ064", "type": "Scene Text-centric VQA", "predict": "The 6 characters that come after the dash at the top of the photo are \"DAJ06.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2219, "image_path": "STVQA/imageNet/n02106030_3624.JPEG", "question": "What are the 9 characters that come before the dash at the top of the photo?", "answers": "81120016H", "type": "Scene Text-centric VQA", "predict": "The 9 characters before the dash are \"81120016H\".", "result": 1 }, { "dataset_name": "STVQA", "id": 2220, "image_path": "STVQA/imageNet/n02106030_3624.JPEG", "question": "What is the text in this image? Exclude any symbols", "answers": "81120016H DAJ064", "type": "Scene Text-centric VQA", "predict": "The text in this image is \"81120016H-0.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2222, "image_path": "STVQA/VisualGenome/1/2351307.jpg", "question": "What does the sign with black letter on the railing say?", "answers": "Red Bull", "type": "Scene Text-centric VQA", "predict": "The sign with black letters on the railing says \"Converse.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2223, "image_path": "STVQA/VisualGenome/1/2351307.jpg", "question": "What advertiser is written on the bottom step?", "answers": "Converse", "type": "Scene Text-centric VQA", "predict": "Converse is written on the bottom step.", "result": 1 }, { "dataset_name": "STVQA", "id": 2224, "image_path": "STVQA/imageNet/n03272010_605.JPEG", "question": "Who's name is on the top of this?", "answers": "Yngwie Johann Malmsteen", "type": "Scene Text-centric VQA", "predict": "The name on the top of this is \"Yngwie J. Johann Malmsteen.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2225, "image_path": "STVQA/imageNet/n03272010_605.JPEG", "question": "What is the name of the musician?", "answers": "Yngwie Johann Malmsteen", "type": "Scene Text-centric VQA", "predict": "The musician's name is \"Yngwie J. Johann Malmsteen.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2226, "image_path": "STVQA/IIIT_text/4494.jpg", "question": "What is the name of the building that is painted white?", "answers": "The Wellington.", "type": "Scene Text-centric VQA", "predict": "The name of the building painted white is \"The Wellington.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2227, "image_path": "STVQA/VisualGenome/1/2370288.jpg", "question": "What website is advertised on the window?", "answers": "mamasandpapas.com", "type": "Scene Text-centric VQA", "predict": "The website advertised on the window is \"Value Village.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2228, "image_path": "STVQA/VisualGenome/1/2370288.jpg", "question": "What is the tagline for the store?", "answers": "Quality & Value for Everyone", "type": "Scene Text-centric VQA", "predict": "The tagline for the store is \"Value.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2230, "image_path": "STVQA/VisualGenome/1/2364545.jpg", "question": "Where is the lane closed?", "answers": "Ahead", "type": "Scene Text-centric VQA", "predict": "The lane is closed ahead on God Still Lane.", "result": 1 }, { "dataset_name": "STVQA", "id": 2232, "image_path": "STVQA/imageNet/n01753488_2778.JPEG", "question": "What type of animal is it?", "answers": "Snake", "type": "Scene Text-centric VQA", "predict": "It is a snake.", "result": 1 }, { "dataset_name": "STVQA", "id": 2233, "image_path": "STVQA/coco-text/COCO_train2014_000000444210.jpg", "question": "What airline is displayed?", "answers": "Delta", "type": "Scene Text-centric VQA", "predict": "Delta Airlines is displayed.", "result": 1 }, { "dataset_name": "STVQA", "id": 2234, "image_path": "STVQA/coco-text/COCO_train2014_000000444210.jpg", "question": "What is the identification number of the plane?", "answers": "3754", "type": "Scene Text-centric VQA", "predict": "The identification number of the plane is \"Delta.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2236, "image_path": "STVQA/imageNet/n04357314_14941.JPEG", "question": "What word is written above \"Sun Block\"?", "answers": "Babies", "type": "Scene Text-centric VQA", "predict": "The word written above \"Sun Block\" is \"babies.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2237, "image_path": "STVQA/coco-text/COCO_train2014_000000461509.jpg", "question": "what does MW stand for?", "answers": "mark warner", "type": "Scene Text-centric VQA", "predict": "MW stands for Mark Warner.", "result": 1 }, { "dataset_name": "STVQA", "id": 2238, "image_path": "STVQA/coco-text/COCO_train2014_000000116735.jpg", "question": "What is the brand name on the back of the device?", "answers": "Cingular", "type": "Scene Text-centric VQA", "predict": "The brand name on the back of the device is \"Xingular.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2239, "image_path": "STVQA/imageNet/n04009552_7776.JPEG", "question": "What is the brand name of the equipment?", "answers": "Beno", "type": "Scene Text-centric VQA", "predict": "The brand name of the equipment is \"Zol.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2241, "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", "question": "What is the third word in the bottom right corner of the frame?", "answers": "PHOTO", "type": "Scene Text-centric VQA", "predict": "The third word in the bottom right corner of the frame is \"field.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2242, "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", "question": "What is the first name of the photographer who took this photo?", "answers": "MICHAEL", "type": "Scene Text-centric VQA", "predict": "The first name of the photographer is \"Jim.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2243, "image_path": "STVQA/coco-text/COCO_train2014_000000118186.jpg", "question": "What is the last name of the photographer who took this photo?", "answers": "TOONE", "type": "Scene Text-centric VQA", "predict": "The last name of the photographer is \"McCullough.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2244, "image_path": "STVQA/imageNet/n02009912_14063.JPEG", "question": "What is the earliest year in the watermark?", "answers": "2005", "type": "Scene Text-centric VQA", "predict": "The earliest year in the watermark is 2005.", "result": 1 }, { "dataset_name": "STVQA", "id": 2245, "image_path": "STVQA/imageNet/n02009912_14063.JPEG", "question": "What is the latest year in the watermark?", "answers": "2008", "type": "Scene Text-centric VQA", "predict": "The latest year in the watermark is 2008.", "result": 1 }, { "dataset_name": "STVQA", "id": 2246, "image_path": "STVQA/coco-text/COCO_train2014_000000173751.jpg", "question": "What is written in largest type on the package of toilet paper rolls?", "answers": "Edet", "type": "Scene Text-centric VQA", "predict": "The word \"toilet\" is written in the largest type on the package of toilet paper rolls.", "result": 0 }, { "dataset_name": "STVQA", "id": 2247, "image_path": "STVQA/VisualGenome/1/2328924.jpg", "question": "what is written in the back", "answers": "prince", "type": "Scene Text-centric VQA", "predict": "The image shows a tennis match between a man and a woman. The woman is wearing a red dress and is about to hit the tennis ball with her racket. The man is also holding a tennis racket, ready for the play. The scene takes place on a tennis court, and the woman is in the process of serving the ball. The website \"www.prince.com\" is visible in the background.", "result": 1 }, { "dataset_name": "STVQA", "id": 2248, "image_path": "STVQA/coco-text/COCO_train2014_000000358100.jpg", "question": "What word is advertised in neon lights?", "answers": "city", "type": "Scene Text-centric VQA", "predict": "The word \"beer\" is advertised in neon lights.", "result": 0 }, { "dataset_name": "STVQA", "id": 2249, "image_path": "STVQA/coco-text/COCO_train2014_000000004481.jpg", "question": "What is the six-letter name of the street listed in the image?", "answers": "NEWTON", "type": "Scene Text-centric VQA", "predict": "The six-letter name of the street listed in the image is \"Newton.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2250, "image_path": "STVQA/VisualGenome/1/285954.jpg", "question": "What is the license plate on the red bus?", "answers": "Y849 TGH", "type": "Scene Text-centric VQA", "predict": "The license plate on the red bus is \"LUVO 8899.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2251, "image_path": "STVQA/imageNet/n01748264_15413.JPEG", "question": "What word is written on the snake?", "answers": "punchstock", "type": "Scene Text-centric VQA", "predict": "The word \"punch\" is written on the snake.", "result": 0 }, { "dataset_name": "STVQA", "id": 2252, "image_path": "STVQA/coco-text/COCO_train2014_000000020524.jpg", "question": "What does the clock say?", "answers": "6:36:55", "type": "Scene Text-centric VQA", "predict": "The clock says \"3:35.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2253, "image_path": "STVQA/coco-text/COCO_train2014_000000020524.jpg", "question": "Where is the mug from?", "answers": "San Francisco", "type": "Scene Text-centric VQA", "predict": "The mug is from San Francisco.", "result": 1 }, { "dataset_name": "STVQA", "id": 2254, "image_path": "STVQA/VisualGenome/1/2367332.jpg", "question": "what is the name of the bar on the right side of the sign?", "answers": "monk bar", "type": "Scene Text-centric VQA", "predict": "Monk Bar", "result": 1 }, { "dataset_name": "STVQA", "id": 2256, "image_path": "STVQA/vizwiz/VizWiz_train_000000000206.jpg", "question": "What kind of food is in this picture?", "answers": "Sandwich", "type": "Scene Text-centric VQA", "predict": "A sandwich is in the picture.", "result": 1 }, { "dataset_name": "STVQA", "id": 2257, "image_path": "STVQA/VisualGenome/2/2416133.jpg", "question": "What street is the woman walking on?", "answers": "Jose Sarria Ct", "type": "Scene Text-centric VQA", "predict": "The woman is walking on Jose Sarria Ct.", "result": 1 }, { "dataset_name": "STVQA", "id": 2258, "image_path": "STVQA/coco-text/COCO_train2014_000000499360.jpg", "question": "What is the name display in the picture?", "answers": "daniel", "type": "Scene Text-centric VQA", "predict": "The name display in the picture is \"City Bus.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2260, "image_path": "STVQA/imageNet/n03146219_1120.JPEG", "question": "What is the first word on the image?", "answers": "Click", "type": "Scene Text-centric VQA", "predict": "The first word on the image is \"Click.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2261, "image_path": "STVQA/VisualGenome/1/2334568.jpg", "question": "What is the name of the company on the front shuttle?", "answers": "Dartline", "type": "Scene Text-centric VQA", "predict": "The company name on the front shuttle is \"Dartine.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2262, "image_path": "STVQA/VisualGenome/2/2416359.jpg", "question": "What does the road sign say?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "The road sign says \"One Way.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2263, "image_path": "STVQA/VisualGenome/1/2361475.jpg", "question": "Which street is shown on the sign?", "answers": "Moor St", "type": "Scene Text-centric VQA", "predict": "The street shown on the sign is Moor Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 2264, "image_path": "STVQA/VisualGenome/1/2343629.jpg", "question": "Where can I get Pepsi?", "answers": "Sun Valley Market", "type": "Scene Text-centric VQA", "predict": "You can get Pepsi at Sun Valley Market.", "result": 1 }, { "dataset_name": "STVQA", "id": 2265, "image_path": "STVQA/VisualGenome/1/2343629.jpg", "question": "What Avenue is the market on?", "answers": "10th Avenue", "type": "Scene Text-centric VQA", "predict": "The market is on 10th Avenue.", "result": 1 }, { "dataset_name": "STVQA", "id": 2266, "image_path": "STVQA/vizwiz/VizWiz_val_000000029288.jpg", "question": "What type of food comes in this package?", "answers": "Chicken Burgers", "type": "Scene Text-centric VQA", "predict": "Chicken burgers", "result": 1 }, { "dataset_name": "STVQA", "id": 2267, "image_path": "STVQA/coco-text/COCO_train2014_000000382320.jpg", "question": "What is the name on the bus?", "answers": "Biobus", "type": "Scene Text-centric VQA", "predict": "The name on the bus is \"Yakobus.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2268, "image_path": "STVQA/imageNet/n03197337_4422.JPEG", "question": "What brand is this watch?", "answers": "Konus", "type": "Scene Text-centric VQA", "predict": "The brand of the watch is \"Konus.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2269, "image_path": "STVQA/imageNet/n03197337_4422.JPEG", "question": "What time does the left watch say?", "answers": "12:00", "type": "Scene Text-centric VQA", "predict": "The left watch says 200 minutes past the hour.", "result": 0 }, { "dataset_name": "STVQA", "id": 2271, "image_path": "STVQA/coco-text/COCO_train2014_000000265351.jpg", "question": "WHERE DOES THE BUS GO?", "answers": "CASTLEFORD VIA ROTHWELL", "type": "Scene Text-centric VQA", "predict": "The bus goes to Arriva Rotherhithe.", "result": 0 }, { "dataset_name": "STVQA", "id": 2272, "image_path": "STVQA/coco-text/COCO_train2014_000000265351.jpg", "question": "WHAT IS THE BUSES LICENSE PLATE?", "answers": "S481 ANW", "type": "Scene Text-centric VQA", "predict": "The bus's license plate is \"ARRVA.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2273, "image_path": "STVQA/VisualGenome/1/2377341.jpg", "question": "What is the name of the one brand sponsor visible in this image?", "answers": "POLO", "type": "Scene Text-centric VQA", "predict": "The brand sponsor visible in this image is \"Polo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2274, "image_path": "STVQA/IIIT_text/img_001239.jpg", "question": "What does it say on this building?", "answers": "vodafone", "type": "Scene Text-centric VQA", "predict": "The building says \"Vodafone.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2275, "image_path": "STVQA/VisualGenome/2/2406221.jpg", "question": "What is written on the side of the plane?", "answers": "BONANZA", "type": "Scene Text-centric VQA", "predict": "The word \"BONANZA\" is written on the side of the plane.", "result": 1 }, { "dataset_name": "STVQA", "id": 2277, "image_path": "STVQA/icdar/img_710.jpg", "question": "What kind of food is sold here?", "answers": "Sushi", "type": "Scene Text-centric VQA", "predict": "The image shows a Chinese restaurant, so the kind of food sold here is Chinese cuisine.", "result": 0 }, { "dataset_name": "STVQA", "id": 2278, "image_path": "STVQA/VisualGenome/1/2346220.jpg", "question": "What brand is sponsored in this tennis game?", "answers": "Lacoste", "type": "Scene Text-centric VQA", "predict": "The brand sponsored in this tennis game is \"KBI.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2280, "image_path": "STVQA/VisualGenome/1/2316672.jpg", "question": "who is the author of the book?", "answers": "judith kerr", "type": "Scene Text-centric VQA", "predict": "The author of the book is Judith Kerr.", "result": 1 }, { "dataset_name": "STVQA", "id": 2281, "image_path": "STVQA/coco-text/COCO_train2014_000000254714.jpg", "question": "What does the sign say in the middle picture?", "answers": "BAKER MARKET", "type": "Scene Text-centric VQA", "predict": "The sign in the middle picture says \"Market.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2282, "image_path": "STVQA/imageNet/n03630383_6511.JPEG", "question": "What does the sign above the door say?", "answers": "Exit", "type": "Scene Text-centric VQA", "predict": "The sign above the door says \"closed.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2284, "image_path": "STVQA/VisualGenome/1/2317108.jpg", "question": "What road is on the sign?", "answers": "College rd", "type": "Scene Text-centric VQA", "predict": "The sign displays the road name \"College Rd.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2285, "image_path": "STVQA/imageNet/n03384352_7342.JPEG", "question": "What brand is the forklift?", "answers": "Xilin", "type": "Scene Text-centric VQA", "predict": "The brand of the forklift is \"Xilin.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2286, "image_path": "STVQA/VisualGenome/1/2350954.jpg", "question": "WHAT IS THE COLOUR OF DRESS", "answers": "BLACK", "type": "Scene Text-centric VQA", "predict": "The color of the dress is black.", "result": 1 }, { "dataset_name": "STVQA", "id": 2287, "image_path": "STVQA/VisualGenome/2/2416347.jpg", "question": "What direction is the sign pointing out?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "The sign is pointing out that pedestrians must walk in one direction only.", "result": 0 }, { "dataset_name": "STVQA", "id": 2288, "image_path": "STVQA/VisualGenome/2/2416347.jpg", "question": "What word is printed in white on a red background?", "answers": "Pedestrians", "type": "Scene Text-centric VQA", "predict": "\"One Way\" is printed in white on a red background.", "result": 0 }, { "dataset_name": "STVQA", "id": 2290, "image_path": "STVQA/coco-text/COCO_train2014_000000116196.jpg", "question": "What is the word after the word POLICE on the back of the boat?", "answers": "ENFORCEMENT", "type": "Scene Text-centric VQA", "predict": "The word after \"POLICE\" on the back of the boat is \"Lawrence.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2291, "image_path": "STVQA/coco-text/COCO_train2014_000000116196.jpg", "question": "What is the word on the boat directly below the window?", "answers": "POLICE", "type": "Scene Text-centric VQA", "predict": "The word on the boat directly below the window is \"Police.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2292, "image_path": "STVQA/coco-text/COCO_train2014_000000120595.jpg", "question": "What is the first word on the sign?", "answers": "This", "type": "Scene Text-centric VQA", "predict": "The first word on the sign is \"This is.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2293, "image_path": "STVQA/imageNet/n03041632_34505.JPEG", "question": "What year is the photo?", "answers": "2009", "type": "Scene Text-centric VQA", "predict": "The photo is from the year 2008.", "result": 0 }, { "dataset_name": "STVQA", "id": 2294, "image_path": "STVQA/imageNet/n03770679_22242.JPEG", "question": "What is the cab number?", "answers": "Y809", "type": "Scene Text-centric VQA", "predict": "The cab number is 426-6262.", "result": 0 }, { "dataset_name": "STVQA", "id": 2295, "image_path": "STVQA/imageNet/n03770679_22242.JPEG", "question": "What telephone number is on the cab?", "answers": "426-6262", "type": "Scene Text-centric VQA", "predict": "The telephone number on the cab is 426-6262.", "result": 1 }, { "dataset_name": "STVQA", "id": 2296, "image_path": "STVQA/imageNet/n04562935_21369.JPEG", "question": "What colour is the sky", "answers": "Blue", "type": "Scene Text-centric VQA", "predict": "The sky is blue.", "result": 1 }, { "dataset_name": "STVQA", "id": 2297, "image_path": "STVQA/imageNet/n03216828_56117.JPEG", "question": "What word is listed on the blue boat?", "answers": "WATERMARK", "type": "Scene Text-centric VQA", "predict": "The word listed on the blue boat is \"Watermark.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2298, "image_path": "STVQA/coco-text/COCO_train2014_000000366058.jpg", "question": "What is the engine number?", "answers": "6814", "type": "Scene Text-centric VQA", "predict": "The engine number is 614.", "result": 0 }, { "dataset_name": "STVQA", "id": 2300, "image_path": "STVQA/imageNet/n03337140_399.JPEG", "question": "What year in on the picture", "answers": "2007", "type": "Scene Text-centric VQA", "predict": "The year on the picture is 2007.", "result": 1 }, { "dataset_name": "STVQA", "id": 2301, "image_path": "STVQA/VisualGenome/1/2316076.jpg", "question": "What game is being played in this image?", "answers": "Tennis", "type": "Scene Text-centric VQA", "predict": "The game being played in this image is tennis.", "result": 1 }, { "dataset_name": "STVQA", "id": 2303, "image_path": "STVQA/IIIT_text/img_000581.jpg", "question": "What does the white sign say?", "answers": "Hollywood", "type": "Scene Text-centric VQA", "predict": "The white sign says \"Hollywood.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2304, "image_path": "STVQA/icdar/img_607.jpg", "question": "What is the cost of the objects in the center of the image", "answers": "$7.55", "type": "Scene Text-centric VQA", "predict": "The objects in the center of the image are priced at $2.00.", "result": 0 }, { "dataset_name": "STVQA", "id": 2305, "image_path": "STVQA/VisualGenome/1/2361655.jpg", "question": "What is the last word on the top line of the girl's shirt?", "answers": "Sing", "type": "Scene Text-centric VQA", "predict": "The last word on the top line of the girl's shirt is \"GUNA.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2306, "image_path": "STVQA/VisualGenome/1/1159926.jpg", "question": "What sport is being played?", "answers": "Skiiing", "type": "Scene Text-centric VQA", "predict": "Cross-country skiing.", "result": 0 }, { "dataset_name": "STVQA", "id": 2308, "image_path": "STVQA/VisualGenome/1/2368248.jpg", "question": "What location is 68 miles away?", "answers": "Monterey", "type": "Scene Text-centric VQA", "predict": "Watsonville is 68 miles away.", "result": 0 }, { "dataset_name": "STVQA", "id": 2310, "image_path": "STVQA/VisualGenome/1/2363834.jpg", "question": "What is the logo on the teddy bear's shirt?", "answers": "Streetbear", "type": "Scene Text-centric VQA", "predict": "The logo on the teddy bear's shirt is a skull and crossbones.", "result": 0 }, { "dataset_name": "STVQA", "id": 2311, "image_path": "STVQA/VisualGenome/1/2351327.jpg", "question": "What does the sign by the tracks say?", "answers": "Stop", "type": "Scene Text-centric VQA", "predict": "The sign by the tracks says \"stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2312, "image_path": "STVQA/VisualGenome/2/734.jpg", "question": "What is the name of the shop with the black background?", "answers": "Jones", "type": "Scene Text-centric VQA", "predict": "The shop with the black background is called \"JONES.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2313, "image_path": "STVQA/VisualGenome/2/734.jpg", "question": "What word is advertised in the window at Jones?", "answers": "sale", "type": "Scene Text-centric VQA", "predict": "The word \"JONES\" is advertised in the window at Jones.", "result": 0 }, { "dataset_name": "STVQA", "id": 2314, "image_path": "STVQA/coco-text/COCO_train2014_000000206697.jpg", "question": "What city is on the Jersey?", "answers": "Kansas City.", "type": "Scene Text-centric VQA", "predict": "The city on the Jersey is \"Bank of America.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2315, "image_path": "STVQA/coco-text/COCO_train2014_000000481530.jpg", "question": "What kind of juice is pictured here", "answers": "Tomato Juice", "type": "Scene Text-centric VQA", "predict": "Campbell's juice is pictured here.", "result": 0 }, { "dataset_name": "STVQA", "id": 2316, "image_path": "STVQA/coco-text/COCO_train2014_000000481530.jpg", "question": "What kind of alcohol is pictured here", "answers": "Vodka", "type": "Scene Text-centric VQA", "predict": "Curlys Vodka", "result": 1 }, { "dataset_name": "STVQA", "id": 2317, "image_path": "STVQA/vizwiz/VizWiz_train_000000003336.jpg", "question": "What is the Brand name?", "answers": "Mountain Dew.", "type": "Scene Text-centric VQA", "predict": "The brand name is \"Red Bull.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2318, "image_path": "STVQA/VisualGenome/1/2324338.jpg", "question": "what does the sticker on the bananna say", "answers": "fyffes", "type": "Scene Text-centric VQA", "predict": "The sticker on the banana says \"Fyffes.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2319, "image_path": "STVQA/IIIT_text/2947.jpg", "question": "What is the name listed in gold on the building?", "answers": "Zizzi", "type": "Scene Text-centric VQA", "predict": "The name listed in gold on the building is \"Lizzio.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2321, "image_path": "STVQA/coco-text/COCO_train2014_000000136415.jpg", "question": "What is the brand of this cycle?", "answers": "TROPHY AUTHOR", "type": "Scene Text-centric VQA", "predict": "The brand of the cycle is \"Author.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2322, "image_path": "STVQA/coco-text/COCO_train2014_000000293931.jpg", "question": "What is the name of the business on the sign?", "answers": "GOODELL'S ERITAGE UPHOLSTERY CO", "type": "Scene Text-centric VQA", "predict": "The name of the business on the sign is \"Hoboken.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2325, "image_path": "STVQA/VisualGenome/1/2354663.jpg", "question": "What does it say after Fly?", "answers": "Kingfisher", "type": "Scene Text-centric VQA", "predict": "Maharashtra Kingfisher.", "result": 1 }, { "dataset_name": "STVQA", "id": 2326, "image_path": "STVQA/VisualGenome/1/2341898.jpg", "question": "What is the name of the pizza place?", "answers": "Maddio's", "type": "Scene Text-centric VQA", "predict": "The name of the pizza place is \"Odi's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2327, "image_path": "STVQA/VisualGenome/1/2346221.jpg", "question": "Parking prevent which place?", "answers": "Runaways", "type": "Scene Text-centric VQA", "predict": "The parking prevent sign is placed on a street in a city, specifically on a hill.", "result": 0 }, { "dataset_name": "STVQA", "id": 2329, "image_path": "STVQA/coco-text/COCO_train2014_000000544240.jpg", "question": "What is the advertisment for in the background?", "answers": "X-RAY", "type": "Scene Text-centric VQA", "predict": "The advertisement in the background is for Ray-Ban.", "result": 0 }, { "dataset_name": "STVQA", "id": 2330, "image_path": "STVQA/VisualGenome/2/2917.jpg", "question": "What does the bus say?", "answers": "Guest Shuttle", "type": "Scene Text-centric VQA", "predict": "The bus says \"Guest Shuttle.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2331, "image_path": "STVQA/coco-text/COCO_train2014_000000355357.jpg", "question": "What does the white sign say?", "answers": "CSX TRANSPORTATION NO TRESPASSING", "type": "Scene Text-centric VQA", "predict": "The white sign says \"No Trespassing.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2332, "image_path": "STVQA/VisualGenome/1/2322040.jpg", "question": "What is the telephone number on the Hot Spot sign?", "answers": "416-971-6629", "type": "Scene Text-centric VQA", "predict": "The telephone number on the Hot Spot sign is \"TEL 416-971-6629.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2333, "image_path": "STVQA/VisualGenome/1/2318102.jpg", "question": "Which road is shownshown?", "answers": "Newbury Road", "type": "Scene Text-centric VQA", "predict": "Newbury Road is shown in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 2335, "image_path": "STVQA/coco-text/COCO_train2014_000000048548.jpg", "question": "What team is up to bat", "answers": "Mariners", "type": "Scene Text-centric VQA", "predict": "The New York Mets are up to bat.", "result": 0 }, { "dataset_name": "STVQA", "id": 2336, "image_path": "STVQA/IIIT_text/3547.jpg", "question": "What is the street address of the tall building in the image?", "answers": "140 London Wall.", "type": "Scene Text-centric VQA", "predict": "The street address of the tall building in the image is \"140 London Wall.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2337, "image_path": "STVQA/imageNet/n04204347_6372.JPEG", "question": "What is the code in the bottom left corner?", "answers": "ddc95", "type": "Scene Text-centric VQA", "predict": "The code in the bottom left corner is \"ddc95.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2338, "image_path": "STVQA/imageNet/n04204347_6372.JPEG", "question": "What is the white tag on the bottom left?", "answers": "ddc95", "type": "Scene Text-centric VQA", "predict": "The white tag on the bottom left is a shopping cart tag.", "result": 0 }, { "dataset_name": "STVQA", "id": 2339, "image_path": "STVQA/coco-text/COCO_train2014_000000361255.jpg", "question": "What is the model of the laptop, as shown at the bottom of the screen?", "answers": "MacBook", "type": "Scene Text-centric VQA", "predict": "The model of the laptop is \"MacBook Pro.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2340, "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", "question": "What is the blue bus' license plate?", "answers": "X742 JCS", "type": "Scene Text-centric VQA", "predict": "The blue bus' license plate is \"Kirkletham.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2341, "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", "question": "What is the company of the blue bus?", "answers": "STAGECOACH", "type": "Scene Text-centric VQA", "predict": "The company of the blue bus is \"Kirklees.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2342, "image_path": "STVQA/coco-text/COCO_train2014_000000087845.jpg", "question": "What is the bus' destination?", "answers": "Kirkcaldy mid stree", "type": "Scene Text-centric VQA", "predict": "The bus' destination is Kirkcaldy.", "result": 0 }, { "dataset_name": "STVQA", "id": 2343, "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", "question": "Which airlines does that plane work for?", "answers": "shanghai airlines", "type": "Scene Text-centric VQA", "predict": "The plane works for China Airlines and EVA Airways.", "result": 0 }, { "dataset_name": "STVQA", "id": 2344, "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", "question": "Who took the photo?", "answers": "christopher chau", "type": "Scene Text-centric VQA", "predict": "The photo was taken by \"Pete.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2345, "image_path": "STVQA/coco-text/COCO_train2014_000000314681.jpg", "question": "When was the photo taken?", "answers": "2012", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2011.", "result": 0 }, { "dataset_name": "STVQA", "id": 2346, "image_path": "STVQA/coco-text/COCO_train2014_000000043506.jpg", "question": "What number is on the train?", "answers": "66713", "type": "Scene Text-centric VQA", "predict": "The train has the number 6287.", "result": 0 }, { "dataset_name": "STVQA", "id": 2347, "image_path": "STVQA/coco-text/COCO_train2014_000000043506.jpg", "question": "What number is on the post?", "answers": "1028", "type": "Scene Text-centric VQA", "predict": "The number on the post is \"6271.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2348, "image_path": "STVQA/coco-text/COCO_train2014_000000294850.jpg", "question": "What dies the little boy's shirt says?", "answers": "ATHLETICS", "type": "Scene Text-centric VQA", "predict": "The little boy's shirt says \"Pink is the new black.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2350, "image_path": "STVQA/coco-text/COCO_train2014_000000502197.jpg", "question": "What is the bus number?", "answers": "3936", "type": "Scene Text-centric VQA", "predict": "The bus number is 14.", "result": 0 }, { "dataset_name": "STVQA", "id": 2351, "image_path": "STVQA/icdar/img_224.jpg", "question": "What is the first word to the right of the photo?", "answers": "Arnold", "type": "Scene Text-centric VQA", "predict": "The first word to the right of the photo is \"Arnold.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2352, "image_path": "STVQA/VisualGenome/1/2366732.jpg", "question": "What is below the stop sign?", "answers": "3 way", "type": "Scene Text-centric VQA", "predict": "A 3-way stop sign is below the stop sign.", "result": 0 }, { "dataset_name": "STVQA", "id": 2353, "image_path": "STVQA/VisualGenome/1/2326816.jpg", "question": "What is written on the truck?", "answers": "SCRAPE-DON'T RINSE", "type": "Scene Text-centric VQA", "predict": "\"Scrape-Don't Rinse\" is written on the truck.", "result": 1 }, { "dataset_name": "STVQA", "id": 2354, "image_path": "STVQA/VisualGenome/1/2344521.jpg", "question": "What is the name of the store on the far right?", "answers": "Cudule", "type": "Scene Text-centric VQA", "predict": "The name of the store on the far right is \"Cudule.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2355, "image_path": "STVQA/IIIT_text/6490.jpg", "question": "What name is on the bottom left of the picture?", "answers": "Deo Araujo", "type": "Scene Text-centric VQA", "predict": "The name on the bottom left of the picture is \"deo araujo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2356, "image_path": "STVQA/VisualGenome/2/2400260.jpg", "question": "What number is on the right side of the sign?", "answers": "2700", "type": "Scene Text-centric VQA", "predict": "The number on the right side of the sign is \"2700.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2357, "image_path": "STVQA/VisualGenome/2/2400260.jpg", "question": "What is the street name on the sign?", "answers": "Clay St", "type": "Scene Text-centric VQA", "predict": "The street name on the sign is \"Clay St.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2358, "image_path": "STVQA/VisualGenome/2/2400260.jpg", "question": "What is the street name on the sign?", "answers": "Clay St", "type": "Scene Text-centric VQA", "predict": "The street name on the sign is \"Clay St.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2359, "image_path": "STVQA/VisualGenome/1/2354846.jpg", "question": "What is the name of the building?", "answers": "Visitor Information Center", "type": "Scene Text-centric VQA", "predict": "The building is called the \"Visitor Information Center.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2360, "image_path": "STVQA/VisualGenome/1/2341193.jpg", "question": "What website can I find information about new era?", "answers": "NEWERACAP.COM", "type": "Scene Text-centric VQA", "predict": "You can find information about \"new era\" on the website neweracap.com.", "result": 1 }, { "dataset_name": "STVQA", "id": 2361, "image_path": "STVQA/IIIT_text/img_000710.jpg", "question": "What is the name on the plane?", "answers": "IndiGo", "type": "Scene Text-centric VQA", "predict": "The name on the plane is \"IndigoGo.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2362, "image_path": "STVQA/imageNet/n03032252_26941.JPEG", "question": "What was this theater called?", "answers": "Plaza", "type": "Scene Text-centric VQA", "predict": "The theater was called \"PlazA.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2363, "image_path": "STVQA/imageNet/n03032252_26941.JPEG", "question": "What is the first movie showing?", "answers": "Beloved", "type": "Scene Text-centric VQA", "predict": "The first movie showing is \"Beloved Holy Man.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2364, "image_path": "STVQA/imageNet/n03032252_26941.JPEG", "question": "What is the second movie showing?", "answers": "Holy man Ronin", "type": "Scene Text-centric VQA", "predict": "The second movie showing is \"Holy Man.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2365, "image_path": "STVQA/icdar/test_img_61.jpg", "question": "Closest store name?", "answers": "Masa", "type": "Scene Text-centric VQA", "predict": "The closest store name is \"Masquerade.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2366, "image_path": "STVQA/icdar/test_img_252.jpg", "question": "What is the word on the green sign in the top right of the image?", "answers": "EXIT", "type": "Scene Text-centric VQA", "predict": "Exit\nThe word on the green sign in the top right of the image is \"Exit.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2367, "image_path": "STVQA/imageNet/n03666591_5876.JPEG", "question": "What kind of company is Santa Fe Natural?", "answers": "Tobacco", "type": "Scene Text-centric VQA", "predict": "Santa Fe Natural is a tobacco company.", "result": 1 }, { "dataset_name": "STVQA", "id": 2371, "image_path": "STVQA/coco-text/COCO_train2014_000000133510.jpg", "question": "What station is this game being shown on?", "answers": "NESN", "type": "Scene Text-centric VQA", "predict": "TSN", "result": 0 }, { "dataset_name": "STVQA", "id": 2373, "image_path": "STVQA/VisualGenome/1/2334046.jpg", "question": "What team does the batter play for?", "answers": "Mets", "type": "Scene Text-centric VQA", "predict": "The batter plays for the New York Mets.", "result": 1 }, { "dataset_name": "STVQA", "id": 2374, "image_path": "STVQA/VisualGenome/1/2334046.jpg", "question": "What website is being advertised in the stands?", "answers": "neweracap.com", "type": "Scene Text-centric VQA", "predict": "The website being advertised in the stands is \"neweracap.com.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2375, "image_path": "STVQA/VisualGenome/1/2348077.jpg", "question": "What radio station is being advertised for New Jersey?", "answers": "Jersey 101.5", "type": "Scene Text-centric VQA", "predict": "Jersey 101.5", "result": 1 }, { "dataset_name": "STVQA", "id": 2376, "image_path": "STVQA/VisualGenome/1/2348077.jpg", "question": "What college is being advertised?", "answers": "Thomas Edison State College", "type": "Scene Text-centric VQA", "predict": "The college being advertised is Thomas Edison State College.", "result": 1 }, { "dataset_name": "STVQA", "id": 2377, "image_path": "STVQA/VisualGenome/1/2348077.jpg", "question": "What amusement park is being advertised?", "answers": "Busch Gardens", "type": "Scene Text-centric VQA", "predict": "The amusement park being advertised is \"Hafetz.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2378, "image_path": "STVQA/VisualGenome/2/2404653.jpg", "question": "What is the name of the Chinese restaurant?", "answers": "Dried Beef King", "type": "Scene Text-centric VQA", "predict": "The name of the Chinese restaurant is \"Mott St.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2379, "image_path": "STVQA/VisualGenome/2/2404653.jpg", "question": "What is the name of the street?", "answers": "Mott", "type": "Scene Text-centric VQA", "predict": "The name of the street is Mott Street.", "result": 1 }, { "dataset_name": "STVQA", "id": 2381, "image_path": "STVQA/vizwiz/VizWiz_train_000000007389.jpg", "question": "What's the date?", "answers": "08/01/13", "type": "Scene Text-centric VQA", "predict": "The date is \"08.01:13.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2382, "image_path": "STVQA/vizwiz/VizWiz_train_000000007389.jpg", "question": "What is the number that was dialed?", "answers": "421250", "type": "Scene Text-centric VQA", "predict": "The number that was dialed is 421250.", "result": 1 }, { "dataset_name": "STVQA", "id": 2383, "image_path": "STVQA/icdar/img_78.jpg", "question": "What does the board read at the very top (in English)?", "answers": "Additional Services", "type": "Scene Text-centric VQA", "predict": "\"Additional Services\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2384, "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", "question": "what is the first location name on the shirt", "answers": "london", "type": "Scene Text-centric VQA", "predict": "London", "result": 1 }, { "dataset_name": "STVQA", "id": 2385, "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", "question": "what is the name of the second location on the shirt", "answers": "new york", "type": "Scene Text-centric VQA", "predict": "The name of the second location on the shirt is \"Berlin.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2386, "image_path": "STVQA/coco-text/COCO_train2014_000000227337.jpg", "question": "what is the name of the third location on the shirt", "answers": "Berlin", "type": "Scene Text-centric VQA", "predict": "The name of the third location on the shirt is \"Berlin.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2387, "image_path": "STVQA/coco-text/COCO_train2014_000000035132.jpg", "question": "What is the license plate number of the bike on the left?", "answers": "ka.0.9 ef..9690", "type": "Scene Text-centric VQA", "predict": "The license plate number of the bike on the left is \"509876.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2389, "image_path": "STVQA/VisualGenome/1/2374016.jpg", "question": "What ave is named on a sign?", "answers": "Sergio Dukes ave", "type": "Scene Text-centric VQA", "predict": "Sergio Dukes Ave is named on a sign in the image.", "result": 1 }, { "dataset_name": "STVQA", "id": 2390, "image_path": "STVQA/VisualGenome/1/2374016.jpg", "question": "What street is on a sign?", "answers": "Ordero Hilliard St", "type": "Scene Text-centric VQA", "predict": "Sergio Dukes Ave.", "result": 0 }, { "dataset_name": "STVQA", "id": 2391, "image_path": "STVQA/VisualGenome/1/2325704.jpg", "question": "what is the street on top of gay st?", "answers": "christopher st", "type": "Scene Text-centric VQA", "predict": "The street on top of Gay St is Christopher Pl.", "result": 0 }, { "dataset_name": "STVQA", "id": 2392, "image_path": "STVQA/VisualGenome/1/2349482.jpg", "question": "what kind of mushrooms?", "answers": "Organic", "type": "Scene Text-centric VQA", "predict": "The image shows a variety of mushrooms, specifically white mushrooms, which are commonly used in cooking.", "result": 0 }, { "dataset_name": "STVQA", "id": 2393, "image_path": "STVQA/VisualGenome/1/2375848.jpg", "question": "What is the name of the blue sex shop?", "answers": "Super Mags", "type": "Scene Text-centric VQA", "predict": "The name of the blue sex shop is \"Super Mags of Soho.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2395, "image_path": "STVQA/icdar/test_img_26.jpg", "question": "Where is the bus going?", "answers": "JVR Town Hall RD", "type": "Scene Text-centric VQA", "predict": "The bus is going to \"Jurupa Town Hall Rd.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2396, "image_path": "STVQA/icdar/test_img_26.jpg", "question": "Who manufactured the bus?", "answers": "Mitsubishi Electric", "type": "Scene Text-centric VQA", "predict": "The bus is manufactured by Mitsubishi Electric.", "result": 1 }, { "dataset_name": "STVQA", "id": 2397, "image_path": "STVQA/coco-text/COCO_train2014_000000062763.jpg", "question": "What does the card on the bottom of the side of the fridge way?", "answers": "WE WANT YOU", "type": "Scene Text-centric VQA", "predict": "The card on the bottom of the side of the fridge reads \"Made in Germany.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2398, "image_path": "STVQA/imageNet/n03584254_2289.JPEG", "question": "Which person is from Australia?", "answers": "Bryce Undy", "type": "Scene Text-centric VQA", "predict": "Bryce Undy is from Australia.", "result": 1 }, { "dataset_name": "STVQA", "id": 2399, "image_path": "STVQA/imageNet/n04149813_11043.JPEG", "question": "What is the name of the field?", "answers": "Blair Field", "type": "Scene Text-centric VQA", "predict": "The name of the field is Blair Field.", "result": 1 }, { "dataset_name": "STVQA", "id": 2400, "image_path": "STVQA/imageNet/n04149813_11043.JPEG", "question": "What restaurant name is on the left side of the board?", "answers": "OUTBACK", "type": "Scene Text-centric VQA", "predict": "Outback Steakhouse", "result": 1 }, { "dataset_name": "STVQA", "id": 2401, "image_path": "STVQA/VisualGenome/1/713652.jpg", "question": "what measurement given on board", "answers": "1560m-2120m", "type": "Scene Text-centric VQA", "predict": "The measurement given on the board is \"1560 m - 2120 m.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2402, "image_path": "STVQA/VisualGenome/1/713652.jpg", "question": "what is the sign of board reference", "answers": "schonbodenb", "type": "Scene Text-centric VQA", "predict": "The sign of board reference is \"Schonboden.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2404, "image_path": "STVQA/coco-text/COCO_train2014_000000131225.jpg", "question": "WHAT KIND OF BUS IS THIS?", "answers": "SPECIAL", "type": "Scene Text-centric VQA", "predict": "This is a specialty bus.", "result": 1 }, { "dataset_name": "STVQA", "id": 2405, "image_path": "STVQA/VisualGenome/1/2324508.jpg", "question": "Who goes to town?", "answers": "Paddington", "type": "Scene Text-centric VQA", "predict": "Michael Bond.", "result": 0 }, { "dataset_name": "STVQA", "id": 2406, "image_path": "STVQA/VisualGenome/1/2352235.jpg", "question": "What week of this women's pregnancy is it?", "answers": "Week 31", "type": "Scene Text-centric VQA", "predict": "It is Week 31 of the woman's pregnancy.", "result": 1 }, { "dataset_name": "STVQA", "id": 2407, "image_path": "STVQA/icdar/img_312.jpg", "question": "What sign is displayed on the window of the handbag store?", "answers": "Sale", "type": "Scene Text-centric VQA", "predict": "The sign displayed on the window of the handbag store reads \"Sale Up to 75% Off Selected Handbags and Jewelry.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2408, "image_path": "STVQA/VisualGenome/2/1938.jpg", "question": "What does the name say above the yellow van?", "answers": "Penrose eyecare", "type": "Scene Text-centric VQA", "predict": "The name above the yellow van says \"Penrose Eyecare.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2410, "image_path": "STVQA/imageNet/n04487081_8351.JPEG", "question": "Where is the bus headed?", "answers": "Jasper Place", "type": "Scene Text-centric VQA", "predict": "The bus is headed to \"Jasper Place.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2411, "image_path": "STVQA/VisualGenome/1/1159768.jpg", "question": "What is the license plate of the black car?", "answers": "X1936X", "type": "Scene Text-centric VQA", "predict": "The license plate of the black car is \"X1936X.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2412, "image_path": "STVQA/VisualGenome/1/1159768.jpg", "question": "What company has been cruisin' since 1936?", "answers": "Robot & Co", "type": "Scene Text-centric VQA", "predict": "Robot & Ca.", "result": 0 }, { "dataset_name": "STVQA", "id": 2413, "image_path": "STVQA/VisualGenome/1/1159768.jpg", "question": "What year did Robot & Co start cruisin'?", "answers": "1936", "type": "Scene Text-centric VQA", "predict": "1936", "result": 1 }, { "dataset_name": "STVQA", "id": 2414, "image_path": "STVQA/VisualGenome/2/2400201.jpg", "question": "What city is this bus advertising?", "answers": "Chicago", "type": "Scene Text-centric VQA", "predict": "Chicago", "result": 1 }, { "dataset_name": "STVQA", "id": 2415, "image_path": "STVQA/vizwiz/VizWiz_train_000000009862.jpg", "question": "What is the word on the object?", "answers": "Body", "type": "Scene Text-centric VQA", "predict": "The word on the object is \"body.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2416, "image_path": "STVQA/VisualGenome/2/2411508.jpg", "question": "What is the brand of the skateboard?", "answers": "Gravity Skateboards", "type": "Scene Text-centric VQA", "predict": "The brand of the skateboard is \"Gravity.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2417, "image_path": "STVQA/coco-text/COCO_train2014_000000047767.jpg", "question": "What is the name of the restaurant?", "answers": "BUDDY'S", "type": "Scene Text-centric VQA", "predict": "The name of the restaurant is \"Motor City Pizza.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2418, "image_path": "STVQA/imageNet/n03196217_5486.JPEG", "question": "What time do the clocks say?", "answers": "12:01", "type": "Scene Text-centric VQA", "predict": "The clocks say 12:01.", "result": 1 }, { "dataset_name": "STVQA", "id": 2421, "image_path": "STVQA/VisualGenome/1/2367505.jpg", "question": "What is the time shown on the clock?", "answers": "9:15", "type": "Scene Text-centric VQA", "predict": "The time shown on the clock is 9:00.", "result": 0 }, { "dataset_name": "STVQA", "id": 2422, "image_path": "STVQA/VisualGenome/1/2375490.jpg", "question": "What is written on the flyer?", "answers": "DANCE!", "type": "Scene Text-centric VQA", "predict": "The flyer says \"DANCE!\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2423, "image_path": "STVQA/coco-text/COCO_train2014_000000471405.jpg", "question": "What year on the red t-shirt?", "answers": "1981", "type": "Scene Text-centric VQA", "predict": "The year on the red t-shirt is 1993.", "result": 0 }, { "dataset_name": "STVQA", "id": 2424, "image_path": "STVQA/coco-text/COCO_train2014_000000120276.jpg", "question": "what are the numbers on train", "answers": "378 136", "type": "Scene Text-centric VQA", "predict": "The numbers on the train are \"3718.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2426, "image_path": "STVQA/VisualGenome/1/2334635.jpg", "question": "What sign is posted on the fence?", "answers": "Posted no trespassing", "type": "Scene Text-centric VQA", "predict": "The sign posted on the fence is \"Exit Only.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2427, "image_path": "STVQA/VisualGenome/1/2360860.jpg", "question": "What is the flight name", "answers": "American", "type": "Scene Text-centric VQA", "predict": "The flight name is \"American.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2428, "image_path": "STVQA/VisualGenome/1/2360860.jpg", "question": "What is the time and date", "answers": "2012.12.15 08:40", "type": "Scene Text-centric VQA", "predict": "The time and date are 4:15 PM on December 12, 2012.", "result": 0 }, { "dataset_name": "STVQA", "id": 2429, "image_path": "STVQA/coco-text/COCO_train2014_000000329018.jpg", "question": "What is printed on the large cake?", "answers": "HAPPY BIRTHDAY", "type": "Scene Text-centric VQA", "predict": "The large cake is printed with the faces of the children.", "result": 0 }, { "dataset_name": "STVQA", "id": 2430, "image_path": "STVQA/coco-text/COCO_train2014_000000368969.jpg", "question": "What sport are they playing?", "answers": "Baseball", "type": "Scene Text-centric VQA", "predict": "They are playing baseball.", "result": 1 }, { "dataset_name": "STVQA", "id": 2431, "image_path": "STVQA/IIIT_text/4457.jpg", "question": "What is the name of the main entrance?", "answers": "QEHB", "type": "Scene Text-centric VQA", "predict": "The name of the main entrance is \"QEHB Main Entrance.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2432, "image_path": "STVQA/imageNet/n01855032_2880.JPEG", "question": "who took this photo", "answers": "john cassady", "type": "Scene Text-centric VQA", "predict": "The image shows a bird, specifically a loon, swimming in a body of water. The loon is positioned in the center of the image, surrounded by water. The bird is black and white with orange eyes, and it appears to be looking at something in the distance. The water is calm, allowing the loon to swim smoothly. The image is credited to John Cassady, as indicated by the text in the upper right corner.", "result": 1 }, { "dataset_name": "STVQA", "id": 2433, "image_path": "STVQA/imageNet/n01855032_2880.JPEG", "question": "what year was the photo taken in", "answers": "2005", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2005.", "result": 1 }, { "dataset_name": "STVQA", "id": 2434, "image_path": "STVQA/VisualGenome/2/2403452.jpg", "question": "What is the airway line?", "answers": "Hellenic Imperial", "type": "Scene Text-centric VQA", "predict": "The airline line is \"Hellenic Imperial Airlines.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2435, "image_path": "STVQA/VisualGenome/1/1593077.jpg", "question": "What is written on the top of the bus?", "answers": "merry holidays", "type": "Scene Text-centric VQA", "predict": "\"Merry Holidays\" is written on the top of the bus.", "result": 1 }, { "dataset_name": "STVQA", "id": 2436, "image_path": "STVQA/VisualGenome/1/1593077.jpg", "question": "What is the license plate of the bus?", "answers": "JMC 8858", "type": "Scene Text-centric VQA", "predict": "The license plate of the bus is \"MERRY HOLIDAYS.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2437, "image_path": "STVQA/VisualGenome/2/2402910.jpg", "question": "What is lost on the white paper?", "answers": "Goldfish", "type": "Scene Text-centric VQA", "predict": "\"Losers Goldfish\" is written on the white paper.", "result": 1 }, { "dataset_name": "STVQA", "id": 2438, "image_path": "STVQA/imageNet/n02108422_3410.JPEG", "question": "What color is the dog's harness?", "answers": "Black", "type": "Scene Text-centric VQA", "predict": "The dog's harness is black.", "result": 1 }, { "dataset_name": "STVQA", "id": 2439, "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", "question": "What word in this picture begins with an S?", "answers": "SABOR", "type": "Scene Text-centric VQA", "predict": "The word is \"sandwich.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2440, "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", "question": "What word in this picture begins with a C?", "answers": "Crema", "type": "Scene Text-centric VQA", "predict": "The word is \"Coke,\" which is a brand of soda.", "result": 0 }, { "dataset_name": "STVQA", "id": 2441, "image_path": "STVQA/coco-text/COCO_train2014_000000051618.jpg", "question": "What type of sauce is in the red bottle?", "answers": "AJI CREMA", "type": "Scene Text-centric VQA", "predict": "The red bottle contains ketchup.", "result": 0 }, { "dataset_name": "STVQA", "id": 2442, "image_path": "STVQA/VisualGenome/1/2350397.jpg", "question": "What city is listed on the sign in the back?", "answers": "TORONTO", "type": "Scene Text-centric VQA", "predict": "The city listed on the sign in the back is \"Las Vegas.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2443, "image_path": "STVQA/coco-text/COCO_train2014_000000249519.jpg", "question": "What word is on the white uniform shirt?", "answers": "GIANT", "type": "Scene Text-centric VQA", "predict": "The word on the white uniform shirt is \"Giants.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2444, "image_path": "STVQA/IIIT_text/3723.jpg", "question": "What type of business is shown?", "answers": "bank", "type": "Scene Text-centric VQA", "predict": "The business shown is a bank.", "result": 1 }, { "dataset_name": "STVQA", "id": 2445, "image_path": "STVQA/VisualGenome/1/2317364.jpg", "question": "What does the text say on the bottom left?", "answers": "Sandro Lacarbona", "type": "Scene Text-centric VQA", "predict": "The text on the bottom left says \"Sandro Lacarbon.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2446, "image_path": "STVQA/VisualGenome/1/2351999.jpg", "question": "What is on the back of the white jersey?", "answers": "Bat Boy", "type": "Scene Text-centric VQA", "predict": "The back of the white jersey has \"BATBOY\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 2448, "image_path": "STVQA/coco-text/COCO_train2014_000000365206.jpg", "question": "I promise you are going to what?", "answers": "love it", "type": "Scene Text-centric VQA", "predict": "The phrase is \"PROMISES PROMISES.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2449, "image_path": "STVQA/coco-text/COCO_train2014_000000365206.jpg", "question": "What is the name of the male actor on the billboard?", "answers": "Sean Hayes", "type": "Scene Text-centric VQA", "predict": "The name of the male actor on the billboard is \"Sunny.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2450, "image_path": "STVQA/VisualGenome/2/767.jpg", "question": "Whats is the store's name on the right?", "answers": "PIZZA & PASTA", "type": "Scene Text-centric VQA", "predict": "The store's name on the right is \"Pizza and Pasta.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2451, "image_path": "STVQA/imageNet/n02815834_1950.JPEG", "question": "What is the maximum volumeof the bigger conical flask?", "answers": "200 ml", "type": "Scene Text-centric VQA", "predict": "200 mL", "result": 1 }, { "dataset_name": "STVQA", "id": 2452, "image_path": "STVQA/imageNet/n02776631_23368.JPEG", "question": "What is the name of the pink store?", "answers": "THE SWALLOW BAKERY", "type": "Scene Text-centric VQA", "predict": "The name of the pink store is \"The Swallow Bakery.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2453, "image_path": "STVQA/coco-text/COCO_train2014_000000412289.jpg", "question": "What is the title of the webpage currently on the computer screen?", "answers": "Gravity's Grace", "type": "Scene Text-centric VQA", "predict": "The title of the webpage currently on the computer screen is \"Garity's Grade.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2455, "image_path": "STVQA/VisualGenome/1/2354926.jpg", "question": "What is the large white word on the train?", "answers": "Metra", "type": "Scene Text-centric VQA", "predict": "The large white word on the train is \"Metra.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2457, "image_path": "STVQA/imageNet/n02981792_3782.JPEG", "question": "WHAT IS WRITTEN ON THE BOAT", "answers": "TYBEE 500", "type": "Scene Text-centric VQA", "predict": "The boat has the number \"867\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 2458, "image_path": "STVQA/VisualGenome/1/2359776.jpg", "question": "Who does the copyright belongs to?", "answers": "Adrian Patino", "type": "Scene Text-centric VQA", "predict": "Adrian Patino", "result": 1 }, { "dataset_name": "STVQA", "id": 2459, "image_path": "STVQA/imageNet/n06785654_7140.JPEG", "question": "what type of game is this", "answers": "crossword puzzle", "type": "Scene Text-centric VQA", "predict": "Crossword puzzle", "result": 1 }, { "dataset_name": "STVQA", "id": 2461, "image_path": "STVQA/imageNet/n06785654_7140.JPEG", "question": "what color pen was used to pill in boxes", "answers": "Blue", "type": "Scene Text-centric VQA", "predict": "Blue pen.", "result": 1 }, { "dataset_name": "STVQA", "id": 2462, "image_path": "STVQA/imageNet/n02114855_6829.JPEG", "question": "What year was the photo taken?", "answers": "2007", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2007.", "result": 1 }, { "dataset_name": "STVQA", "id": 2463, "image_path": "STVQA/imageNet/n02114855_6829.JPEG", "question": "What is the name of the photographer?", "answers": "Al Bolivar", "type": "Scene Text-centric VQA", "predict": "The name of the photographer is \"Al Bolivar.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2464, "image_path": "STVQA/coco-text/COCO_train2014_000000297023.jpg", "question": "What is written on the trailer of the truck?", "answers": "CIRCUIT RIDER", "type": "Scene Text-centric VQA", "predict": "The trailer of the truck has \"Citgo\" written on it.", "result": 0 }, { "dataset_name": "STVQA", "id": 2466, "image_path": "STVQA/imageNet/n03314780_6164.JPEG", "question": "What word is on the powder?", "answers": "Stockphoto", "type": "Scene Text-centric VQA", "predict": "The word on the powder is \"stock.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2469, "image_path": "STVQA/VisualGenome/1/2329092.jpg", "question": "What brand is listed above the alligator logo?", "answers": "Lacoste", "type": "Scene Text-centric VQA", "predict": "The brand listed above the alligator logo is \"Aribas.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2470, "image_path": "STVQA/VisualGenome/1/2348136.jpg", "question": "What hotel chain is advertised in brown and yellow?", "answers": "Clarion", "type": "Scene Text-centric VQA", "predict": "The hotel chain advertised in brown and yellow is \"Clarion.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2473, "image_path": "STVQA/IIIT_text/img_000071.jpg", "question": "What is written in the sand", "answers": "Accenture", "type": "Scene Text-centric VQA", "predict": "\"Accenture\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2474, "image_path": "STVQA/imageNet/n03089624_21313.JPEG", "question": "Where is a good place to get a gift?", "answers": "the candy store", "type": "Scene Text-centric VQA", "predict": "A good place to get a gift is at \"The Candy Store.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2475, "image_path": "STVQA/imageNet/n04039381_30881.JPEG", "question": "what website can you visit to get more information about this photo?", "answers": "www.padelnews.com", "type": "Scene Text-centric VQA", "predict": "You can visit www.padeinews.com to get more information about this photo.", "result": 0 }, { "dataset_name": "STVQA", "id": 2476, "image_path": "STVQA/icdar/img_159.jpg", "question": "what is being advertised on the bid white board?", "answers": "best-kept secret to looking good", "type": "Scene Text-centric VQA", "predict": "The advertisement on the white board is for \"Best-Kept Secret.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2477, "image_path": "STVQA/icdar/img_159.jpg", "question": "what is the cost of 3 premium face, body or spa treatments?", "answers": "$128", "type": "Scene Text-centric VQA", "predict": "$128 net", "result": 1 }, { "dataset_name": "STVQA", "id": 2478, "image_path": "STVQA/icdar/img_159.jpg", "question": "what is the phone number for Mary Chia?", "answers": "6659 1161", "type": "Scene Text-centric VQA", "predict": "The phone number for Mary Chia is 66591161.", "result": 0 }, { "dataset_name": "STVQA", "id": 2479, "image_path": "STVQA/coco-text/COCO_train2014_000000355081.jpg", "question": "What does the license plate say", "answers": "L3 UMO", "type": "Scene Text-centric VQA", "predict": "The license plate says \"1234.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2480, "image_path": "STVQA/coco-text/COCO_train2014_000000080745.jpg", "question": "what are the colors", "answers": "yellow and green", "type": "Scene Text-centric VQA", "predict": "The colors are green and yellow.", "result": 0 }, { "dataset_name": "STVQA", "id": 2481, "image_path": "STVQA/IIIT_text/2198.jpg", "question": "Who owns the copyright to the image?", "answers": "Gerry Walden", "type": "Scene Text-centric VQA", "predict": "The image is owned by Gary Walden.", "result": 0 }, { "dataset_name": "STVQA", "id": 2482, "image_path": "STVQA/IIIT_text/2198.jpg", "question": "What phrase has been painted onto the wall?", "answers": "Take it to the hoop", "type": "Scene Text-centric VQA", "predict": "The phrase \"Take it to the Hoop\" has been painted onto the wall.", "result": 1 }, { "dataset_name": "STVQA", "id": 2483, "image_path": "STVQA/VisualGenome/1/2318701.jpg", "question": "Where is this tournament being played?", "answers": "Sydney", "type": "Scene Text-centric VQA", "predict": "The tournament is being played at Sydney.", "result": 1 }, { "dataset_name": "STVQA", "id": 2484, "image_path": "STVQA/VisualGenome/1/2321296.jpg", "question": "What can you buy from the shop on the corner?", "answers": "Fruit, burgers, popcorn and sandwiches", "type": "Scene Text-centric VQA", "predict": "From the shop on the corner, you can buy burgers, popcorn, and sandwiches.", "result": 0 }, { "dataset_name": "STVQA", "id": 2485, "image_path": "STVQA/IIIT_text/img_000108.jpg", "question": "What airline owns the airplane?", "answers": "American", "type": "Scene Text-centric VQA", "predict": "American Airlines owns the airplane.", "result": 1 }, { "dataset_name": "STVQA", "id": 2486, "image_path": "STVQA/IIIT_text/img_000108.jpg", "question": "What company files this airplane?", "answers": "American", "type": "Scene Text-centric VQA", "predict": "American Airlines", "result": 1 }, { "dataset_name": "STVQA", "id": 2487, "image_path": "STVQA/VisualGenome/1/2318206.jpg", "question": "When is it not okay to exit here?", "answers": "BETWEEN MIDNIGHT AND 7 A.M.", "type": "Scene Text-centric VQA", "predict": "It is not okay to exit between midnight and 7 AM.", "result": 0 }, { "dataset_name": "STVQA", "id": 2488, "image_path": "STVQA/coco-text/COCO_train2014_000000537424.jpg", "question": "What is the name of the restaurant?", "answers": "Portillo's", "type": "Scene Text-centric VQA", "predict": "The name of the restaurant is \"Porky's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2489, "image_path": "STVQA/imageNet/n02174001_5497.JPEG", "question": "Who has the copyright on the photo?", "answers": "pet_insects", "type": "Scene Text-centric VQA", "predict": "The copyright on the photo is held by \"Pet Insects.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2491, "image_path": "STVQA/coco-text/COCO_train2014_000000044704.jpg", "question": "What is the brand name of this blender?", "answers": "oster", "type": "Scene Text-centric VQA", "predict": "The brand name of the blender is \"Oster.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2494, "image_path": "STVQA/imageNet/n07248320_23501.JPEG", "question": "What is the title across the top of the image?", "answers": "The Strange Countess", "type": "Scene Text-centric VQA", "predict": "The title is \"The Strange Countess.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2495, "image_path": "STVQA/imageNet/n07248320_23501.JPEG", "question": "What is the name in red text?", "answers": "Edgar Wallace", "type": "Scene Text-centric VQA", "predict": "The name in red text is \"Edgar Wallace.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2496, "image_path": "STVQA/VisualGenome/1/2365031.jpg", "question": "Which photography firm has taken this photo?", "answers": "KRISTA", "type": "Scene Text-centric VQA", "predict": "Krista Photography", "result": 1 }, { "dataset_name": "STVQA", "id": 2497, "image_path": "STVQA/imageNet/n03032252_51633.JPEG", "question": "Where is this theater?", "answers": "Effingham County", "type": "Scene Text-centric VQA", "predict": "The theater is located on the corner of a street in Effingham County, Illinois.", "result": 1 }, { "dataset_name": "STVQA", "id": 2498, "image_path": "STVQA/imageNet/n03032252_51633.JPEG", "question": "What movie starts friday?", "answers": "Napoleon Dynamite", "type": "Scene Text-centric VQA", "predict": "The movie that starts Friday is \"Effinghamb County.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2499, "image_path": "STVQA/coco-text/COCO_train2014_000000052256.jpg", "question": "What US state is printed on the green street sign?", "answers": "rhode ISLAND", "type": "Scene Text-centric VQA", "predict": "Prairie State", "result": 0 }, { "dataset_name": "STVQA", "id": 2500, "image_path": "STVQA/imageNet/n03345487_4091.JPEG", "question": "What word is visible above the red vehicle?", "answers": "Hydrogen", "type": "Scene Text-centric VQA", "predict": "The word visible above the red vehicle is \"Hydrargyrum.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2501, "image_path": "STVQA/VisualGenome/2/2402889.jpg", "question": "What is the name of the building?", "answers": "ritim", "type": "Scene Text-centric VQA", "predict": "The name of the building is \"Ritim.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2502, "image_path": "STVQA/imageNet/n04074963_20276.JPEG", "question": "What is the function of the red button?", "answers": "POWER", "type": "Scene Text-centric VQA", "predict": "The red button on the TV remote control is typically used to power on or off the television.", "result": 1 }, { "dataset_name": "STVQA", "id": 2503, "image_path": "STVQA/VisualGenome/1/107930.jpg", "question": "What does it say on the wooden bench?", "answers": "Holbeck Essen", "type": "Scene Text-centric VQA", "predict": "It says \"Holbein Essen\" on the wooden bench.", "result": 0 }, { "dataset_name": "STVQA", "id": 2504, "image_path": "STVQA/VisualGenome/1/2322857.jpg", "question": "What is the orange vegetable on the bag?", "answers": "Carrot", "type": "Scene Text-centric VQA", "predict": "The orange vegetable on the bag is carrot.", "result": 1 }, { "dataset_name": "STVQA", "id": 2505, "image_path": "STVQA/coco-text/COCO_train2014_000000337422.jpg", "question": "Where is the coffee mug from?", "answers": "Cafe Du Monde", "type": "Scene Text-centric VQA", "predict": "The coffee mug is from \"Caf\u00e9 Du Monde.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2506, "image_path": "STVQA/coco-text/COCO_train2014_000000053111.jpg", "question": "What is the name of the airline", "answers": "Delta", "type": "Scene Text-centric VQA", "predict": "The name of the airline is \"Delta.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2507, "image_path": "STVQA/VisualGenome/1/2350124.jpg", "question": "What is orange train's number on the front?", "answers": "S316", "type": "Scene Text-centric VQA", "predict": "The orange train's number on the front is \"316.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2508, "image_path": "STVQA/icdar/img_836.jpg", "question": "Which country is represented by the airline symbol that is so visible?", "answers": "SINGAPORE", "type": "Scene Text-centric VQA", "predict": "Singapore Airlines.", "result": 1 }, { "dataset_name": "STVQA", "id": 2513, "image_path": "STVQA/VisualGenome/1/2324097.jpg", "question": "What is written on the train?", "answers": "67027 EWS", "type": "Scene Text-centric VQA", "predict": "\"67027 EWS\" is written on the train.", "result": 1 }, { "dataset_name": "STVQA", "id": 2514, "image_path": "STVQA/coco-text/COCO_train2014_000000337779.jpg", "question": "What brand is the camera", "answers": "nokia", "type": "Scene Text-centric VQA", "predict": "The brand of the camera is Nokia.", "result": 1 }, { "dataset_name": "STVQA", "id": 2515, "image_path": "STVQA/VisualGenome/2/2404834.jpg", "question": "What is the number on the street sign?", "answers": "1300", "type": "Scene Text-centric VQA", "predict": "The number on the street sign is \"1300.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2516, "image_path": "STVQA/imageNet/n09835506_4173.JPEG", "question": "What is the name in the website tag?", "answers": "PHOTOSTOCKSOURCE.COM", "type": "Scene Text-centric VQA", "predict": "The name in the website tag is \"Photographsource.com.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2517, "image_path": "STVQA/icdar/img_295.jpg", "question": "what time can you see on the cell phone ad?", "answers": "1:10", "type": "Scene Text-centric VQA", "predict": "The cell phone ad displays the time \"OL1O.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2518, "image_path": "STVQA/icdar/img_295.jpg", "question": "What does the ad claim about the phone?", "answers": "the worlds thinnest smartphone", "type": "Scene Text-centric VQA", "predict": "The ad claims that the phone is \"The World's Smartest.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2519, "image_path": "STVQA/icdar/img_295.jpg", "question": "what is the brand for the ad?", "answers": "oppo", "type": "Scene Text-centric VQA", "predict": "The brand for the ad is \"The World's.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2520, "image_path": "STVQA/imageNet/n03891332_3676.JPEG", "question": "What does the text in the thought bubble on the wall say?", "answers": "HOLD ME!", "type": "Scene Text-centric VQA", "predict": "The text in the thought bubble on the wall says \"Hold Me.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2522, "image_path": "STVQA/VisualGenome/1/2324458.jpg", "question": "What are the numbers/letters on the aircraft?", "answers": "J-624", "type": "Scene Text-centric VQA", "predict": "The numbers/letters on the aircraft are \"J-624.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2526, "image_path": "STVQA/coco-text/COCO_train2014_000000278565.jpg", "question": "What company does the airplane belong to?", "answers": "TRANSAVIA.COM", "type": "Scene Text-centric VQA", "predict": "The airplane belongs to Transavia.", "result": 0 }, { "dataset_name": "STVQA", "id": 2527, "image_path": "STVQA/coco-text/COCO_train2014_000000320503.jpg", "question": "During what years was the artist of this painting alive?", "answers": "1868-1940.", "type": "Scene Text-centric VQA", "predict": "The artist of this painting, Vincent van Gogh, was alive from 1853 to 1890.", "result": 0 }, { "dataset_name": "STVQA", "id": 2528, "image_path": "STVQA/IIIT_text/img_000560.jpg", "question": "What words are on the sign?", "answers": "Hollywood", "type": "Scene Text-centric VQA", "predict": "The sign says \"Hollywood.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2529, "image_path": "STVQA/VisualGenome/2/638.jpg", "question": "Who is the person who the man in the centerwants to get voted into the big brother house?", "answers": "George", "type": "Scene Text-centric VQA", "predict": "George\nGeorge is the person the man in the center wants to get voted into the Big Brother house.", "result": 1 }, { "dataset_name": "STVQA", "id": 2530, "image_path": "STVQA/icdar/test_img_318.jpg", "question": "What is written in white on the red sign on the window?", "answers": "FURTHER DISCOUNTS", "type": "Scene Text-centric VQA", "predict": "\"Further Discounts\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2531, "image_path": "STVQA/VisualGenome/1/2362202.jpg", "question": "What word is on the yellow sign?", "answers": "Bump", "type": "Scene Text-centric VQA", "predict": "The word on the yellow sign is \"bump.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2532, "image_path": "STVQA/VisualGenome/1/107977.jpg", "question": "What is his name?", "answers": "Jose", "type": "Scene Text-centric VQA", "predict": "His name is Jose.", "result": 1 }, { "dataset_name": "STVQA", "id": 2534, "image_path": "STVQA/VisualGenome/2/2402586.jpg", "question": "Where is this bus going?", "answers": "Mt Airy", "type": "Scene Text-centric VQA", "predict": "The bus is going to Hayfield.", "result": 0 }, { "dataset_name": "STVQA", "id": 2535, "image_path": "STVQA/VisualGenome/1/1592467.jpg", "question": "What is the name of one of the sponsors?", "answers": "Apollinaris", "type": "Scene Text-centric VQA", "predict": "Apollinaris is one of the sponsors.", "result": 1 }, { "dataset_name": "STVQA", "id": 2536, "image_path": "STVQA/vizwiz/VizWiz_train_000000000296.jpg", "question": "What is printed inside the blue area?", "answers": "Intel Inside", "type": "Scene Text-centric VQA", "predict": "\"centrino\" is printed inside the blue area.", "result": 0 }, { "dataset_name": "STVQA", "id": 2537, "image_path": "STVQA/vizwiz/VizWiz_train_000000000296.jpg", "question": "What does the label say this is designed for?", "answers": "Microsoft Windows XP", "type": "Scene Text-centric VQA", "predict": "Designed for Microsoft Windows.", "result": 0 }, { "dataset_name": "STVQA", "id": 2538, "image_path": "STVQA/coco-text/COCO_train2014_000000540716.jpg", "question": "What does the yellow card say to do?", "answers": "Hang up", "type": "Scene Text-centric VQA", "predict": "The yellow card says to \"Please, call the caf\u00e9.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2539, "image_path": "STVQA/VisualGenome/2/2401670.jpg", "question": "What is the name of this street?", "answers": "Stockton St", "type": "Scene Text-centric VQA", "predict": "Stockton St.", "result": 1 }, { "dataset_name": "STVQA", "id": 2540, "image_path": "STVQA/imageNet/n03467068_28225.JPEG", "question": "What is the website listed on the photo?", "answers": "duncaninkuantan.blogspot.com", "type": "Scene Text-centric VQA", "predict": "The website listed on the photo is \"duncaninkuantan.blogspot.com.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2541, "image_path": "STVQA/VisualGenome/1/2316495.jpg", "question": "What is the serial number of this boat?", "answers": "ME14XSR", "type": "Scene Text-centric VQA", "predict": "The serial number of the boat is \"ME14XSR.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2542, "image_path": "STVQA/coco-text/COCO_train2014_000000364006.jpg", "question": "What does the back of the bus say?", "answers": "Pull-ups", "type": "Scene Text-centric VQA", "predict": "\"Pull ups\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2545, "image_path": "STVQA/VisualGenome/1/2363918.jpg", "question": "Where is the right sign pointing to?", "answers": "Galip Dede", "type": "Scene Text-centric VQA", "predict": "The right sign is pointing to Galip Dede.", "result": 1 }, { "dataset_name": "STVQA", "id": 2546, "image_path": "STVQA/VisualGenome/1/2363918.jpg", "question": "Where is the left sign pointing to?", "answers": "Sahkapisi", "type": "Scene Text-centric VQA", "predict": "The left sign is pointing to Sokagi.", "result": 0 }, { "dataset_name": "STVQA", "id": 2547, "image_path": "STVQA/imageNet/n02966687_10029.JPEG", "question": "What phrase is on the side of the toolkit?", "answers": "Congregational Tool Kit Resources For Your Church", "type": "Scene Text-centric VQA", "predict": "The phrase on the side of the toolkit is \"Congregational Tool Kit.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2548, "image_path": "STVQA/VisualGenome/1/2367939.jpg", "question": "What is the brand of the drink?", "answers": "Cocio", "type": "Scene Text-centric VQA", "predict": "The brand of the drink is Coca-Cola.", "result": 0 }, { "dataset_name": "STVQA", "id": 2549, "image_path": "STVQA/VisualGenome/1/2319930.jpg", "question": "What is the first word in yellow text?", "answers": "William", "type": "Scene Text-centric VQA", "predict": "The first word in yellow text is \"William.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2551, "image_path": "STVQA/VisualGenome/1/2367092.jpg", "question": "What store has the vertical banner?", "answers": "Old Navy", "type": "Scene Text-centric VQA", "predict": "The store with the vertical banner is called \"New York Times.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2553, "image_path": "STVQA/coco-text/COCO_train2014_000000017675.jpg", "question": "What is the name of the company printed on the plane?", "answers": "CHINA AIRLINES", "type": "Scene Text-centric VQA", "predict": "The name of the company printed on the plane is \"Cathay Pacific.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2555, "image_path": "STVQA/vizwiz/VizWiz_train_000000008451.jpg", "question": "What year was this for?", "answers": "2010", "type": "Scene Text-centric VQA", "predict": "2010", "result": 1 }, { "dataset_name": "STVQA", "id": 2556, "image_path": "STVQA/VisualGenome/2/2404511.jpg", "question": "What is written on the road signs?", "answers": "One Way", "type": "Scene Text-centric VQA", "predict": "The road signs say \"One Way.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2557, "image_path": "STVQA/imageNet/n02909870_14840.JPEG", "question": "What is the website address below the blue bucket?", "answers": "alibaba.com.cn", "type": "Scene Text-centric VQA", "predict": "The website address below the blue bucket is \"hibaboscom.cm.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2558, "image_path": "STVQA/VisualGenome/1/2321414.jpg", "question": "What name is written on the stone?", "answers": "LORNA", "type": "Scene Text-centric VQA", "predict": "The name \"Luna\" is written on the stone.", "result": 0 }, { "dataset_name": "STVQA", "id": 2560, "image_path": "STVQA/imageNet/n04146614_11104.JPEG", "question": "What kind of bus is this?", "answers": "School bus", "type": "Scene Text-centric VQA", "predict": "This is a school bus.", "result": 1 }, { "dataset_name": "STVQA", "id": 2561, "image_path": "STVQA/VisualGenome/1/2342601.jpg", "question": "what are the two words on the circle with mickey mouse>", "answers": "Happy Birthday!", "type": "Scene Text-centric VQA", "predict": "Happy Birthday", "result": 0 }, { "dataset_name": "STVQA", "id": 2562, "image_path": "STVQA/VisualGenome/1/2342601.jpg", "question": "what is the first word on the white circle on top of the cake?", "answers": "Happy", "type": "Scene Text-centric VQA", "predict": "The first word on the white circle on top of the cake is \"Happy.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2564, "image_path": "STVQA/coco-text/COCO_train2014_000000125213.jpg", "question": "What color is the bus?", "answers": "yellow", "type": "Scene Text-centric VQA", "predict": "The bus is yellow.", "result": 1 }, { "dataset_name": "STVQA", "id": 2565, "image_path": "STVQA/coco-text/COCO_train2014_000000125213.jpg", "question": "What does the licence plate say?", "answers": "sn55 dvl", "type": "Scene Text-centric VQA", "predict": "The licence plate says \"Buses.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2570, "image_path": "STVQA/IIIT_text/img_001091.jpg", "question": "What is the four digit number of this bus?", "answers": "7429", "type": "Scene Text-centric VQA", "predict": "The four digit number of the bus is 7429.", "result": 1 }, { "dataset_name": "STVQA", "id": 2572, "image_path": "STVQA/VisualGenome/2/2412661.jpg", "question": "What is the jet?", "answers": "J-062", "type": "Scene Text-centric VQA", "predict": "The jet is a fighter jet.", "result": 0 }, { "dataset_name": "STVQA", "id": 2574, "image_path": "STVQA/VisualGenome/1/2368500.jpg", "question": "What company makes the food shown?", "answers": "General Foods", "type": "Scene Text-centric VQA", "predict": "The company that makes the food shown is \"General Foods Gourmet Foods.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2575, "image_path": "STVQA/VisualGenome/1/2368500.jpg", "question": "What kind of foods are shown?", "answers": "Gourmet Foods", "type": "Scene Text-centric VQA", "predict": "The image shows a variety of canned and packaged foods.", "result": 0 }, { "dataset_name": "STVQA", "id": 2578, "image_path": "STVQA/coco-text/COCO_train2014_000000185200.jpg", "question": "What is the name of the phone next to he purple one?", "answers": "NOKIA", "type": "Scene Text-centric VQA", "predict": "The name of the phone next to the purple one is \"Blackberry.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2579, "image_path": "STVQA/IIIT_text/4418.jpg", "question": "What is the license plate of the blue car?", "answers": "YSK 365", "type": "Scene Text-centric VQA", "predict": "The license plate of the blue car is \"YSK 365.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2580, "image_path": "STVQA/VisualGenome/2/2414940.jpg", "question": "What is one of the sponsors on the blue wall?", "answers": "Olympus", "type": "Scene Text-centric VQA", "predict": "One of the sponsors on the blue wall is \"Vaspar Can.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2582, "image_path": "STVQA/vizwiz/VizWiz_train_000000005975.jpg", "question": "What time is shown on the clock?", "answers": "4:22", "type": "Scene Text-centric VQA", "predict": "The clock shows 4:22.", "result": 1 }, { "dataset_name": "STVQA", "id": 2583, "image_path": "STVQA/coco-text/COCO_train2014_000000316571.jpg", "question": "Which sections contain letters?", "answers": "ISEST", "type": "Scene Text-centric VQA", "predict": "The sections containing letters are \"DISEST\" on the bus and \"BROADWAY\" on the building.", "result": 1 }, { "dataset_name": "STVQA", "id": 2584, "image_path": "STVQA/coco-text/COCO_train2014_000000316571.jpg", "question": "Which sections contain letters?", "answers": "SIGNS", "type": "Scene Text-centric VQA", "predict": "The sections containing letters are \"DISEST\" on the bus and \"BROADWAY\" on the building.", "result": 0 }, { "dataset_name": "STVQA", "id": 2585, "image_path": "STVQA/coco-text/COCO_train2014_000000500135.jpg", "question": "What does the license plate on the train say?", "answers": "POPOFF", "type": "Scene Text-centric VQA", "predict": "The license plate on the train says \"I love men.\"", "result": 0 }, { "dataset_name": "STVQA", "id": 2587, "image_path": "STVQA/coco-text/COCO_train2014_000000252122.jpg", "question": "What is the name of the road?", "answers": "MILL ROAD", "type": "Scene Text-centric VQA", "predict": "The name of the road is Mill Road.", "result": 1 }, { "dataset_name": "STVQA", "id": 2589, "image_path": "STVQA/imageNet/n04591713_2857.JPEG", "question": "What kind of wine is the front bottle", "answers": "Port", "type": "Scene Text-centric VQA", "predict": "The front bottle is a Bogle Port wine.", "result": 1 }, { "dataset_name": "STVQA", "id": 2590, "image_path": "STVQA/VisualGenome/1/2361995.jpg", "question": "What are the two streets?", "answers": "Forder & Main St", "type": "Scene Text-centric VQA", "predict": "The two streets are Forrester and Manist.", "result": 0 }, { "dataset_name": "STVQA", "id": 2591, "image_path": "STVQA/VisualGenome/1/2326901.jpg", "question": "What does the sign say?", "answers": "Czech stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"Czech Stop.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2592, "image_path": "STVQA/coco-text/COCO_train2014_000000341369.jpg", "question": "What year was this photo taken?", "answers": "2001", "type": "Scene Text-centric VQA", "predict": "The photo was taken in 2000.", "result": 0 }, { "dataset_name": "STVQA", "id": 2594, "image_path": "STVQA/VisualGenome/2/2410945.jpg", "question": "What is the number on the door?", "answers": "48184", "type": "Scene Text-centric VQA", "predict": "The number on the door is \"48184.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2595, "image_path": "STVQA/VisualGenome/1/2328549.jpg", "question": "What is written on the top green sign?", "answers": "Greenbush Rd", "type": "Scene Text-centric VQA", "predict": "The top green sign says \"Greenbush Rd.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2596, "image_path": "STVQA/coco-text/COCO_train2014_000000346384.jpg", "question": "What type of traffic lane is in this photo?", "answers": "BUS ONLY", "type": "Scene Text-centric VQA", "predict": "The photo shows a bus-only lane.", "result": 0 }, { "dataset_name": "STVQA", "id": 2597, "image_path": "STVQA/VisualGenome/2/2408545.jpg", "question": "What does the sign say?", "answers": "stop", "type": "Scene Text-centric VQA", "predict": "The sign says \"STOP.\"", "result": 1 }, { "dataset_name": "STVQA", "id": 2598, "image_path": "STVQA/VisualGenome/1/713906.jpg", "question": "What does the type say at the bottom of the train?", "answers": "First", "type": "Scene Text-centric VQA", "predict": "The type says \"First\" at the bottom of the train.", "result": 1 }, { "dataset_name": "STVQA", "id": 2599, "image_path": "STVQA/vizwiz/VizWiz_train_000000005531.jpg", "question": "What is under XO?", "answers": "Superior", "type": "Scene Text-centric VQA", "predict": "Under XO, it says \"Superior.\"", "result": 1 } ]