@inproceedings{wang-zhang-2022-addressing,
title = "Addressing Asymmetry in Multilingual Neural Machine Translation with Fuzzy Task Clustering",
author = "Wang, Qian and
Zhang, Jiajun",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.455",
pages = "5129--5141",
abstract = "Multilingual neural machine translation (NMT) enables positive knowledge transfer among multiple translation tasks with a shared underlying model, but a unified multilingual model usually suffers from capacity bottleneck when tens or hundreds of languages are involved. A possible solution is to cluster languages and train individual model for each cluster. However, the existing clustering methods based on language similarity cannot handle the asymmetric problem in multilingual NMT, i.e., one translation task A can benefit from another translation task B but task B will be harmed by task A. To address this problem, we propose a fuzzy task clustering method for multilingual NMT. Specifically, we employ task affinity, defined as the loss change of one translation task caused by the training of another, as the clustering criterion. Next, we cluster the translation tasks based on the task affinity, such that tasks from the same cluster can benefit each other. For each cluster, we further find out a set of auxiliary translation tasks that benefit the tasks in this cluster. In this way, the model for each cluster is trained not only on the tasks in the cluster but also on the auxiliary tasks. We conduct extensive experiments for one-to-many, manyto-one, and many-to-many translation scenarios to verify the effectiveness of our method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-zhang-2022-addressing">
<titleInfo>
<title>Addressing Asymmetry in Multilingual Neural Machine Translation with Fuzzy Task Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multilingual neural machine translation (NMT) enables positive knowledge transfer among multiple translation tasks with a shared underlying model, but a unified multilingual model usually suffers from capacity bottleneck when tens or hundreds of languages are involved. A possible solution is to cluster languages and train individual model for each cluster. However, the existing clustering methods based on language similarity cannot handle the asymmetric problem in multilingual NMT, i.e., one translation task A can benefit from another translation task B but task B will be harmed by task A. To address this problem, we propose a fuzzy task clustering method for multilingual NMT. Specifically, we employ task affinity, defined as the loss change of one translation task caused by the training of another, as the clustering criterion. Next, we cluster the translation tasks based on the task affinity, such that tasks from the same cluster can benefit each other. For each cluster, we further find out a set of auxiliary translation tasks that benefit the tasks in this cluster. In this way, the model for each cluster is trained not only on the tasks in the cluster but also on the auxiliary tasks. We conduct extensive experiments for one-to-many, manyto-one, and many-to-many translation scenarios to verify the effectiveness of our method.</abstract>
<identifier type="citekey">wang-zhang-2022-addressing</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.455</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>5129</start>
<end>5141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Addressing Asymmetry in Multilingual Neural Machine Translation with Fuzzy Task Clustering
%A Wang, Qian
%A Zhang, Jiajun
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F wang-zhang-2022-addressing
%X Multilingual neural machine translation (NMT) enables positive knowledge transfer among multiple translation tasks with a shared underlying model, but a unified multilingual model usually suffers from capacity bottleneck when tens or hundreds of languages are involved. A possible solution is to cluster languages and train individual model for each cluster. However, the existing clustering methods based on language similarity cannot handle the asymmetric problem in multilingual NMT, i.e., one translation task A can benefit from another translation task B but task B will be harmed by task A. To address this problem, we propose a fuzzy task clustering method for multilingual NMT. Specifically, we employ task affinity, defined as the loss change of one translation task caused by the training of another, as the clustering criterion. Next, we cluster the translation tasks based on the task affinity, such that tasks from the same cluster can benefit each other. For each cluster, we further find out a set of auxiliary translation tasks that benefit the tasks in this cluster. In this way, the model for each cluster is trained not only on the tasks in the cluster but also on the auxiliary tasks. We conduct extensive experiments for one-to-many, manyto-one, and many-to-many translation scenarios to verify the effectiveness of our method.
%U https://aclanthology.org/2022.coling-1.455
%P 5129-5141
Markdown (Informal)
[Addressing Asymmetry in Multilingual Neural Machine Translation with Fuzzy Task Clustering](https://aclanthology.org/2022.coling-1.455) (Wang & Zhang, COLING 2022)
ACL