@inproceedings{malaviya-etal-2023-quest,
title = "{QUEST}: A Retrieval Dataset of Entity-Seeking Queries with Implicit Set Operations",
author = "Malaviya, Chaitanya and
Shaw, Peter and
Chang, Ming-Wei and
Lee, Kenton and
Toutanova, Kristina",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.784",
doi = "10.18653/v1/2023.acl-long.784",
pages = "14032--14047",
abstract = "Formulating selective information needs results in queries that implicitly specify set operations, such as intersection, union, and difference. For instance, one might search for {``}shorebirds that are not sandpipers{''} or {``}science-fiction films shot in England{''}. To study the ability of retrieval systems to meet such information needs, we construct QUEST, a dataset of 3357 natural language queries with implicit set operations, that map to a set of entities corresponding to Wikipedia documents. The dataset challenges models to match multiple constraints mentioned in queries with corresponding evidence in documents and correctly perform various set operations. The dataset is constructed semi-automatically using Wikipedia category names. Queries are automatically composed from individual categories, then paraphrased and further validated for naturalness and fluency by crowdworkers. Crowdworkers also assess the relevance of entities based on their documents and highlight attribution of query constraints to spans of document text. We analyze several modern retrieval systems, finding that they often struggle on such queries. Queries involving negation and conjunction are particularly challenging and systems are further challenged with combinations of these operations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="malaviya-etal-2023-quest">
<titleInfo>
<title>QUEST: A Retrieval Dataset of Entity-Seeking Queries with Implicit Set Operations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chaitanya</namePart>
<namePart type="family">Malaviya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Shaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenton</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Formulating selective information needs results in queries that implicitly specify set operations, such as intersection, union, and difference. For instance, one might search for “shorebirds that are not sandpipers” or “science-fiction films shot in England”. To study the ability of retrieval systems to meet such information needs, we construct QUEST, a dataset of 3357 natural language queries with implicit set operations, that map to a set of entities corresponding to Wikipedia documents. The dataset challenges models to match multiple constraints mentioned in queries with corresponding evidence in documents and correctly perform various set operations. The dataset is constructed semi-automatically using Wikipedia category names. Queries are automatically composed from individual categories, then paraphrased and further validated for naturalness and fluency by crowdworkers. Crowdworkers also assess the relevance of entities based on their documents and highlight attribution of query constraints to spans of document text. We analyze several modern retrieval systems, finding that they often struggle on such queries. Queries involving negation and conjunction are particularly challenging and systems are further challenged with combinations of these operations.</abstract>
<identifier type="citekey">malaviya-etal-2023-quest</identifier>
<identifier type="doi">10.18653/v1/2023.acl-long.784</identifier>
<location>
<url>https://aclanthology.org/2023.acl-long.784</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>14032</start>
<end>14047</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T QUEST: A Retrieval Dataset of Entity-Seeking Queries with Implicit Set Operations
%A Malaviya, Chaitanya
%A Shaw, Peter
%A Chang, Ming-Wei
%A Lee, Kenton
%A Toutanova, Kristina
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F malaviya-etal-2023-quest
%X Formulating selective information needs results in queries that implicitly specify set operations, such as intersection, union, and difference. For instance, one might search for “shorebirds that are not sandpipers” or “science-fiction films shot in England”. To study the ability of retrieval systems to meet such information needs, we construct QUEST, a dataset of 3357 natural language queries with implicit set operations, that map to a set of entities corresponding to Wikipedia documents. The dataset challenges models to match multiple constraints mentioned in queries with corresponding evidence in documents and correctly perform various set operations. The dataset is constructed semi-automatically using Wikipedia category names. Queries are automatically composed from individual categories, then paraphrased and further validated for naturalness and fluency by crowdworkers. Crowdworkers also assess the relevance of entities based on their documents and highlight attribution of query constraints to spans of document text. We analyze several modern retrieval systems, finding that they often struggle on such queries. Queries involving negation and conjunction are particularly challenging and systems are further challenged with combinations of these operations.
%R 10.18653/v1/2023.acl-long.784
%U https://aclanthology.org/2023.acl-long.784
%U https://doi.org/10.18653/v1/2023.acl-long.784
%P 14032-14047
Markdown (Informal)
[QUEST: A Retrieval Dataset of Entity-Seeking Queries with Implicit Set Operations](https://aclanthology.org/2023.acl-long.784) (Malaviya et al., ACL 2023)
ACL