@inproceedings{manakhimova-etal-2023-linguistically,
title = "Linguistically Motivated Evaluation of the 2023 State-of-the-art Machine Translation: Can {C}hat{GPT} Outperform {NMT}?",
author = {Manakhimova, Shushen and
Avramidis, Eleftherios and
Macketanz, Vivien and
Lapshinova-Koltunski, Ekaterina and
Bagdasarov, Sergei and
M{\"o}ller, Sebastian},
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.23",
doi = "10.18653/v1/2023.wmt-1.23",
pages = "224--245",
abstract = "This paper offers a fine-grained analysis of the machine translation outputs in the context of the Shared Task at the 8th Conference of Machine Translation (WMT23). Building on the foundation of previous test suite efforts, our analysis includes Large Language Models and an updated test set featuring new linguistic phenomena. To our knowledge, this is the first fine-grained linguistic analysis for the GPT-4 translation outputs. Our evaluation spans German-English, English-German, and English-Russian language directions. Some of the phenomena with the lowest accuracies for German-English are idioms and resultative predicates. For English-German, these include mediopassive voice, and noun formation(er). As for English-Russian, these included idioms and semantic roles. GPT-4 performs equally or comparably to the best systems in German-English and English-German but falls in the second significance cluster for English-Russian.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manakhimova-etal-2023-linguistically">
<titleInfo>
<title>Linguistically Motivated Evaluation of the 2023 State-of-the-art Machine Translation: Can ChatGPT Outperform NMT?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shushen</namePart>
<namePart type="family">Manakhimova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleftherios</namePart>
<namePart type="family">Avramidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivien</namePart>
<namePart type="family">Macketanz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergei</namePart>
<namePart type="family">Bagdasarov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper offers a fine-grained analysis of the machine translation outputs in the context of the Shared Task at the 8th Conference of Machine Translation (WMT23). Building on the foundation of previous test suite efforts, our analysis includes Large Language Models and an updated test set featuring new linguistic phenomena. To our knowledge, this is the first fine-grained linguistic analysis for the GPT-4 translation outputs. Our evaluation spans German-English, English-German, and English-Russian language directions. Some of the phenomena with the lowest accuracies for German-English are idioms and resultative predicates. For English-German, these include mediopassive voice, and noun formation(er). As for English-Russian, these included idioms and semantic roles. GPT-4 performs equally or comparably to the best systems in German-English and English-German but falls in the second significance cluster for English-Russian.</abstract>
<identifier type="citekey">manakhimova-etal-2023-linguistically</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.23</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.23</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>224</start>
<end>245</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistically Motivated Evaluation of the 2023 State-of-the-art Machine Translation: Can ChatGPT Outperform NMT?
%A Manakhimova, Shushen
%A Avramidis, Eleftherios
%A Macketanz, Vivien
%A Lapshinova-Koltunski, Ekaterina
%A Bagdasarov, Sergei
%A Möller, Sebastian
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F manakhimova-etal-2023-linguistically
%X This paper offers a fine-grained analysis of the machine translation outputs in the context of the Shared Task at the 8th Conference of Machine Translation (WMT23). Building on the foundation of previous test suite efforts, our analysis includes Large Language Models and an updated test set featuring new linguistic phenomena. To our knowledge, this is the first fine-grained linguistic analysis for the GPT-4 translation outputs. Our evaluation spans German-English, English-German, and English-Russian language directions. Some of the phenomena with the lowest accuracies for German-English are idioms and resultative predicates. For English-German, these include mediopassive voice, and noun formation(er). As for English-Russian, these included idioms and semantic roles. GPT-4 performs equally or comparably to the best systems in German-English and English-German but falls in the second significance cluster for English-Russian.
%R 10.18653/v1/2023.wmt-1.23
%U https://aclanthology.org/2023.wmt-1.23
%U https://doi.org/10.18653/v1/2023.wmt-1.23
%P 224-245
Markdown (Informal)
[Linguistically Motivated Evaluation of the 2023 State-of-the-art Machine Translation: Can ChatGPT Outperform NMT?](https://aclanthology.org/2023.wmt-1.23) (Manakhimova et al., WMT 2023)
ACL