@inproceedings{xie-2023-bigai,
title = "The {BIGAI} Offline Speech Translation Systems for {IWSLT} 2023 Evaluation",
author = "Xie, Zhihang",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.7",
doi = "10.18653/v1/2023.iwslt-1.7",
pages = "123--129",
abstract = "This paper describes the BIGAI{'}s submission to IWSLT 2023 Offline Speech Translation task on three language tracks from English to Chinese, German and Japanese. The end-to-end systems are built upon a Wav2Vec2 model for speech recognition and mBART50 models for machine translation. An adapter module is applied to bridge the speech module and the translation module. The CTC loss between speech features and source token sequence is incorporated during training. Experiments show that the systems can generate reasonable translations on three languages. The proposed models achieve BLEU scores of 22.3 for en→de, 10.7 for en→ja and 33.0 for en→zh on tst2023 TED datasets. However, the performance is decreased by a significant margin on complex scenarios like persentations and interview.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-2023-bigai">
<titleInfo>
<title>The BIGAI Offline Speech Translation Systems for IWSLT 2023 Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhihang</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the BIGAI’s submission to IWSLT 2023 Offline Speech Translation task on three language tracks from English to Chinese, German and Japanese. The end-to-end systems are built upon a Wav2Vec2 model for speech recognition and mBART50 models for machine translation. An adapter module is applied to bridge the speech module and the translation module. The CTC loss between speech features and source token sequence is incorporated during training. Experiments show that the systems can generate reasonable translations on three languages. The proposed models achieve BLEU scores of 22.3 for en→de, 10.7 for en→ja and 33.0 for en→zh on tst2023 TED datasets. However, the performance is decreased by a significant margin on complex scenarios like persentations and interview.</abstract>
<identifier type="citekey">xie-2023-bigai</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.7</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>123</start>
<end>129</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The BIGAI Offline Speech Translation Systems for IWSLT 2023 Evaluation
%A Xie, Zhihang
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F xie-2023-bigai
%X This paper describes the BIGAI’s submission to IWSLT 2023 Offline Speech Translation task on three language tracks from English to Chinese, German and Japanese. The end-to-end systems are built upon a Wav2Vec2 model for speech recognition and mBART50 models for machine translation. An adapter module is applied to bridge the speech module and the translation module. The CTC loss between speech features and source token sequence is incorporated during training. Experiments show that the systems can generate reasonable translations on three languages. The proposed models achieve BLEU scores of 22.3 for en→de, 10.7 for en→ja and 33.0 for en→zh on tst2023 TED datasets. However, the performance is decreased by a significant margin on complex scenarios like persentations and interview.
%R 10.18653/v1/2023.iwslt-1.7
%U https://aclanthology.org/2023.iwslt-1.7
%U https://doi.org/10.18653/v1/2023.iwslt-1.7
%P 123-129
Markdown (Informal)
[The BIGAI Offline Speech Translation Systems for IWSLT 2023 Evaluation](https://aclanthology.org/2023.iwslt-1.7) (Xie, IWSLT 2023)
ACL