@inproceedings{fu-etal-2020-design,
title = "Design Challenges in Low-resource Cross-lingual Entity Linking",
author = "Fu, Xingyu and
Shi, Weijia and
Yu, Xiaodong and
Zhao, Zian and
Roth, Dan",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.521",
doi = "10.18653/v1/2020.emnlp-main.521",
pages = "6418--6432",
abstract = "Cross-lingual Entity Linking (XEL), the problem of grounding mentions of entities in a foreign language text into an English knowledge base such as Wikipedia, has seen a lot of research in recent years, with a range of promising techniques. However, current techniques do not rise to the challenges introduced by text in low-resource languages (LRL) and, surprisingly, fail to generalize to text not taken from Wikipedia, on which they are usually trained. This paper provides a thorough analysis of low-resource XEL techniques, focusing on the key step of identifying candidate English Wikipedia titles that correspond to a given foreign language mention. Our analysis indicates that current methods are limited by their reliance on Wikipedia{'}s interlanguage links and thus suffer when the foreign language{'}s Wikipedia is small. We conclude that the LRL setting requires the use of outside-Wikipedia cross-lingual resources and present a simple yet effective zero-shot XEL system, QuEL, that utilizes search engines query logs. With experiments on 25 languages, QuEL shows an average increase of 25{\%} in gold candidate recall and of 13{\%} in end-to-end linking accuracy over state-of-the-art baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fu-etal-2020-design">
<titleInfo>
<title>Design Challenges in Low-resource Cross-lingual Entity Linking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xingyu</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zian</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-lingual Entity Linking (XEL), the problem of grounding mentions of entities in a foreign language text into an English knowledge base such as Wikipedia, has seen a lot of research in recent years, with a range of promising techniques. However, current techniques do not rise to the challenges introduced by text in low-resource languages (LRL) and, surprisingly, fail to generalize to text not taken from Wikipedia, on which they are usually trained. This paper provides a thorough analysis of low-resource XEL techniques, focusing on the key step of identifying candidate English Wikipedia titles that correspond to a given foreign language mention. Our analysis indicates that current methods are limited by their reliance on Wikipedia’s interlanguage links and thus suffer when the foreign language’s Wikipedia is small. We conclude that the LRL setting requires the use of outside-Wikipedia cross-lingual resources and present a simple yet effective zero-shot XEL system, QuEL, that utilizes search engines query logs. With experiments on 25 languages, QuEL shows an average increase of 25% in gold candidate recall and of 13% in end-to-end linking accuracy over state-of-the-art baselines.</abstract>
<identifier type="citekey">fu-etal-2020-design</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.521</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.521</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>6418</start>
<end>6432</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Design Challenges in Low-resource Cross-lingual Entity Linking
%A Fu, Xingyu
%A Shi, Weijia
%A Yu, Xiaodong
%A Zhao, Zian
%A Roth, Dan
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F fu-etal-2020-design
%X Cross-lingual Entity Linking (XEL), the problem of grounding mentions of entities in a foreign language text into an English knowledge base such as Wikipedia, has seen a lot of research in recent years, with a range of promising techniques. However, current techniques do not rise to the challenges introduced by text in low-resource languages (LRL) and, surprisingly, fail to generalize to text not taken from Wikipedia, on which they are usually trained. This paper provides a thorough analysis of low-resource XEL techniques, focusing on the key step of identifying candidate English Wikipedia titles that correspond to a given foreign language mention. Our analysis indicates that current methods are limited by their reliance on Wikipedia’s interlanguage links and thus suffer when the foreign language’s Wikipedia is small. We conclude that the LRL setting requires the use of outside-Wikipedia cross-lingual resources and present a simple yet effective zero-shot XEL system, QuEL, that utilizes search engines query logs. With experiments on 25 languages, QuEL shows an average increase of 25% in gold candidate recall and of 13% in end-to-end linking accuracy over state-of-the-art baselines.
%R 10.18653/v1/2020.emnlp-main.521
%U https://aclanthology.org/2020.emnlp-main.521
%U https://doi.org/10.18653/v1/2020.emnlp-main.521
%P 6418-6432
Markdown (Informal)
[Design Challenges in Low-resource Cross-lingual Entity Linking](https://aclanthology.org/2020.emnlp-main.521) (Fu et al., EMNLP 2020)
ACL