@inproceedings{park-etal-2020-scale,
title = "Scale down Transformer by Grouping Features for a Lightweight Character-level Language Model",
author = "Park, Sungrae and
Kim, Geewook and
Lee, Junyeop and
Cha, Junbum and
Kim, Ji-Hoon and
Lee, Hwalsuk",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.607/",
doi = "10.18653/v1/2020.coling-main.607",
pages = "6883--6893",
abstract = "This paper introduces a method that efficiently reduces the computational cost and parameter size of Transformer. The proposed model, refer to as Group-Transformer, splits feature space into multiple groups, factorizes the calculation paths, and reduces computations for the group interaction. Extensive experiments on two benchmark tasks, enwik8 and text8, prove our model`s effectiveness and efficiency in small-scale Transformers. To the best of our knowledge, Group-Transformer is the first attempt to design Transformer with the group strategy, widely used for efficient CNN architectures."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-etal-2020-scale">
<titleInfo>
<title>Scale down Transformer by Grouping Features for a Lightweight Character-level Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sungrae</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geewook</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyeop</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junbum</namePart>
<namePart type="family">Cha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ji-Hoon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hwalsuk</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a method that efficiently reduces the computational cost and parameter size of Transformer. The proposed model, refer to as Group-Transformer, splits feature space into multiple groups, factorizes the calculation paths, and reduces computations for the group interaction. Extensive experiments on two benchmark tasks, enwik8 and text8, prove our model‘s effectiveness and efficiency in small-scale Transformers. To the best of our knowledge, Group-Transformer is the first attempt to design Transformer with the group strategy, widely used for efficient CNN architectures.</abstract>
<identifier type="citekey">park-etal-2020-scale</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.607</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.607/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>6883</start>
<end>6893</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scale down Transformer by Grouping Features for a Lightweight Character-level Language Model
%A Park, Sungrae
%A Kim, Geewook
%A Lee, Junyeop
%A Cha, Junbum
%A Kim, Ji-Hoon
%A Lee, Hwalsuk
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F park-etal-2020-scale
%X This paper introduces a method that efficiently reduces the computational cost and parameter size of Transformer. The proposed model, refer to as Group-Transformer, splits feature space into multiple groups, factorizes the calculation paths, and reduces computations for the group interaction. Extensive experiments on two benchmark tasks, enwik8 and text8, prove our model‘s effectiveness and efficiency in small-scale Transformers. To the best of our knowledge, Group-Transformer is the first attempt to design Transformer with the group strategy, widely used for efficient CNN architectures.
%R 10.18653/v1/2020.coling-main.607
%U https://aclanthology.org/2020.coling-main.607/
%U https://doi.org/10.18653/v1/2020.coling-main.607
%P 6883-6893
Markdown (Informal)
[Scale down Transformer by Grouping Features for a Lightweight Character-level Language Model](https://aclanthology.org/2020.coling-main.607/) (Park et al., COLING 2020)
ACL