@inproceedings{luo-etal-2021-just,
title = "{\textquoteleft}Just because you are right, doesn`t mean {I} am wrong': Overcoming a bottleneck in development and evaluation of Open-Ended {VQA} tasks",
author = "Luo, Man and
Sampat, Shailaja Keyur and
Tallman, Riley and
Zeng, Yankai and
Vancha, Manuha and
Sajja, Akarshan and
Baral, Chitta",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.240/",
doi = "10.18653/v1/2021.eacl-main.240",
pages = "2766--2771",
abstract = "GQA (CITATION) is a dataset for real-world visual reasoning and compositional question answering. We found that many answers predicted by the best vision-language models on the GQA dataset do not match the ground-truth answer but still are semantically meaningful and correct in the given context. In fact, this is the case with most existing visual question answering (VQA) datasets where they assume only one ground-truth answer for each question. We propose Alternative Answer Sets (AAS) of ground-truth answers to address this limitation, which is created automatically using off-the-shelf NLP tools. We introduce a semantic metric based on AAS and modify top VQA solvers to support multiple plausible answers for a question. We implement this approach on the GQA dataset and show the performance improvements."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2021-just">
<titleInfo>
<title>‘Just because you are right, doesn‘t mean I am wrong’: Overcoming a bottleneck in development and evaluation of Open-Ended VQA tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Man</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shailaja</namePart>
<namePart type="given">Keyur</namePart>
<namePart type="family">Sampat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riley</namePart>
<namePart type="family">Tallman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yankai</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuha</namePart>
<namePart type="family">Vancha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akarshan</namePart>
<namePart type="family">Sajja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chitta</namePart>
<namePart type="family">Baral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>GQA (CITATION) is a dataset for real-world visual reasoning and compositional question answering. We found that many answers predicted by the best vision-language models on the GQA dataset do not match the ground-truth answer but still are semantically meaningful and correct in the given context. In fact, this is the case with most existing visual question answering (VQA) datasets where they assume only one ground-truth answer for each question. We propose Alternative Answer Sets (AAS) of ground-truth answers to address this limitation, which is created automatically using off-the-shelf NLP tools. We introduce a semantic metric based on AAS and modify top VQA solvers to support multiple plausible answers for a question. We implement this approach on the GQA dataset and show the performance improvements.</abstract>
<identifier type="citekey">luo-etal-2021-just</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.240</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.240/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>2766</start>
<end>2771</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ‘Just because you are right, doesn‘t mean I am wrong’: Overcoming a bottleneck in development and evaluation of Open-Ended VQA tasks
%A Luo, Man
%A Sampat, Shailaja Keyur
%A Tallman, Riley
%A Zeng, Yankai
%A Vancha, Manuha
%A Sajja, Akarshan
%A Baral, Chitta
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F luo-etal-2021-just
%X GQA (CITATION) is a dataset for real-world visual reasoning and compositional question answering. We found that many answers predicted by the best vision-language models on the GQA dataset do not match the ground-truth answer but still are semantically meaningful and correct in the given context. In fact, this is the case with most existing visual question answering (VQA) datasets where they assume only one ground-truth answer for each question. We propose Alternative Answer Sets (AAS) of ground-truth answers to address this limitation, which is created automatically using off-the-shelf NLP tools. We introduce a semantic metric based on AAS and modify top VQA solvers to support multiple plausible answers for a question. We implement this approach on the GQA dataset and show the performance improvements.
%R 10.18653/v1/2021.eacl-main.240
%U https://aclanthology.org/2021.eacl-main.240/
%U https://doi.org/10.18653/v1/2021.eacl-main.240
%P 2766-2771
Markdown (Informal)
[‘Just because you are right, doesn’t mean I am wrong’: Overcoming a bottleneck in development and evaluation of Open-Ended VQA tasks](https://aclanthology.org/2021.eacl-main.240/) (Luo et al., EACL 2021)
ACL