@inproceedings{aji-etal-2022-one,
title = "One Country, 700+ Languages: {NLP} Challenges for Underrepresented Languages and Dialects in {I}ndonesia",
author = "Aji, Alham Fikri and
Winata, Genta Indra and
Koto, Fajri and
Cahyawijaya, Samuel and
Romadhony, Ade and
Mahendra, Rahmad and
Kurniawan, Kemal and
Moeljadi, David and
Prasojo, Radityo Eko and
Baldwin, Timothy and
Lau, Jey Han and
Ruder, Sebastian",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.500",
doi = "10.18653/v1/2022.acl-long.500",
pages = "7226--7249",
abstract = "NLP research is impeded by a lack of resources and awareness of the challenges presented by underrepresented languages and dialects. Focusing on the languages spoken in Indonesia, the second most linguistically diverse and the fourth most populous nation of the world, we provide an overview of the current state of NLP research for Indonesia{'}s 700+ languages. We highlight challenges in Indonesian NLP and how these affect the performance of current NLP systems. Finally, we provide general recommendations to help develop NLP technology not only for languages of Indonesia but also other underrepresented languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aji-etal-2022-one">
<titleInfo>
<title>One Country, 700+ Languages: NLP Challenges for Underrepresented Languages and Dialects in Indonesia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alham</namePart>
<namePart type="given">Fikri</namePart>
<namePart type="family">Aji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Genta</namePart>
<namePart type="given">Indra</namePart>
<namePart type="family">Winata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fajri</namePart>
<namePart type="family">Koto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Cahyawijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ade</namePart>
<namePart type="family">Romadhony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahmad</namePart>
<namePart type="family">Mahendra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Kurniawan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Moeljadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radityo</namePart>
<namePart type="given">Eko</namePart>
<namePart type="family">Prasojo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jey</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Lau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>NLP research is impeded by a lack of resources and awareness of the challenges presented by underrepresented languages and dialects. Focusing on the languages spoken in Indonesia, the second most linguistically diverse and the fourth most populous nation of the world, we provide an overview of the current state of NLP research for Indonesia’s 700+ languages. We highlight challenges in Indonesian NLP and how these affect the performance of current NLP systems. Finally, we provide general recommendations to help develop NLP technology not only for languages of Indonesia but also other underrepresented languages.</abstract>
<identifier type="citekey">aji-etal-2022-one</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.500</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.500</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>7226</start>
<end>7249</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One Country, 700+ Languages: NLP Challenges for Underrepresented Languages and Dialects in Indonesia
%A Aji, Alham Fikri
%A Winata, Genta Indra
%A Koto, Fajri
%A Cahyawijaya, Samuel
%A Romadhony, Ade
%A Mahendra, Rahmad
%A Kurniawan, Kemal
%A Moeljadi, David
%A Prasojo, Radityo Eko
%A Baldwin, Timothy
%A Lau, Jey Han
%A Ruder, Sebastian
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F aji-etal-2022-one
%X NLP research is impeded by a lack of resources and awareness of the challenges presented by underrepresented languages and dialects. Focusing on the languages spoken in Indonesia, the second most linguistically diverse and the fourth most populous nation of the world, we provide an overview of the current state of NLP research for Indonesia’s 700+ languages. We highlight challenges in Indonesian NLP and how these affect the performance of current NLP systems. Finally, we provide general recommendations to help develop NLP technology not only for languages of Indonesia but also other underrepresented languages.
%R 10.18653/v1/2022.acl-long.500
%U https://aclanthology.org/2022.acl-long.500
%U https://doi.org/10.18653/v1/2022.acl-long.500
%P 7226-7249
Markdown (Informal)
[One Country, 700+ Languages: NLP Challenges for Underrepresented Languages and Dialects in Indonesia](https://aclanthology.org/2022.acl-long.500) (Aji et al., ACL 2022)
ACL
- Alham Fikri Aji, Genta Indra Winata, Fajri Koto, Samuel Cahyawijaya, Ade Romadhony, Rahmad Mahendra, Kemal Kurniawan, David Moeljadi, Radityo Eko Prasojo, Timothy Baldwin, Jey Han Lau, and Sebastian Ruder. 2022. One Country, 700+ Languages: NLP Challenges for Underrepresented Languages and Dialects in Indonesia. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 7226–7249, Dublin, Ireland. Association for Computational Linguistics.