@inproceedings{chakravarthy-etal-2022-systematicity,
title = "Systematicity Emerges in Transformers when Abstract Grammatical Roles Guide Attention",
author = "Chakravarthy, Ayush K and
Russin, Jacob Labe and
O{'}Reilly, Randall",
editor = "Ippolito, Daphne and
Li, Liunian Harold and
Pacheco, Maria Leonor and
Chen, Danqi and
Xue, Nianwen",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-srw.1",
doi = "10.18653/v1/2022.naacl-srw.1",
pages = "1--8",
abstract = "Systematicity is thought to be a key inductive bias possessed by humans that is lacking in standard natural language processing systems such as those utilizing transformers. In this work, we investigate the extent to which the failure of transformers on systematic generalization tests can be attributed to a lack of linguistic abstraction in its attention mechanism. We develop a novel modification to the transformer by implementing two separate input streams: a role stream controls the attention distributions (i.e., queries and keys) at each layer, and a filler stream determines the values. Our results show that when abstract role labels are assigned to input sequences and provided to the role stream, systematic generalization is improved.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chakravarthy-etal-2022-systematicity">
<titleInfo>
<title>Systematicity Emerges in Transformers when Abstract Grammatical Roles Guide Attention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayush</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Chakravarthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">Labe</namePart>
<namePart type="family">Russin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Randall</namePart>
<namePart type="family">O’Reilly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liunian</namePart>
<namePart type="given">Harold</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Leonor</namePart>
<namePart type="family">Pacheco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Systematicity is thought to be a key inductive bias possessed by humans that is lacking in standard natural language processing systems such as those utilizing transformers. In this work, we investigate the extent to which the failure of transformers on systematic generalization tests can be attributed to a lack of linguistic abstraction in its attention mechanism. We develop a novel modification to the transformer by implementing two separate input streams: a role stream controls the attention distributions (i.e., queries and keys) at each layer, and a filler stream determines the values. Our results show that when abstract role labels are assigned to input sequences and provided to the role stream, systematic generalization is improved.</abstract>
<identifier type="citekey">chakravarthy-etal-2022-systematicity</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-srw.1</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-srw.1</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Systematicity Emerges in Transformers when Abstract Grammatical Roles Guide Attention
%A Chakravarthy, Ayush K.
%A Russin, Jacob Labe
%A O’Reilly, Randall
%Y Ippolito, Daphne
%Y Li, Liunian Harold
%Y Pacheco, Maria Leonor
%Y Chen, Danqi
%Y Xue, Nianwen
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F chakravarthy-etal-2022-systematicity
%X Systematicity is thought to be a key inductive bias possessed by humans that is lacking in standard natural language processing systems such as those utilizing transformers. In this work, we investigate the extent to which the failure of transformers on systematic generalization tests can be attributed to a lack of linguistic abstraction in its attention mechanism. We develop a novel modification to the transformer by implementing two separate input streams: a role stream controls the attention distributions (i.e., queries and keys) at each layer, and a filler stream determines the values. Our results show that when abstract role labels are assigned to input sequences and provided to the role stream, systematic generalization is improved.
%R 10.18653/v1/2022.naacl-srw.1
%U https://aclanthology.org/2022.naacl-srw.1
%U https://doi.org/10.18653/v1/2022.naacl-srw.1
%P 1-8
Markdown (Informal)
[Systematicity Emerges in Transformers when Abstract Grammatical Roles Guide Attention](https://aclanthology.org/2022.naacl-srw.1) (Chakravarthy et al., NAACL 2022)
ACL