@inproceedings{li-etal-2024-bordirlines,
title = "{B}ord{IR}lines: A Dataset for Evaluating Cross-lingual Retrieval Augmented Generation",
author = "Li, Bryan and
Haider, Samar and
Luo, Fiona and
Agashe, Adwait and
Callison-Burch, Chris",
editor = "Lucie-Aim{\'e}e, Lucie and
Fan, Angela and
Gwadabe, Tajuddeen and
Johnson, Isaac and
Petroni, Fabio and
van Strien, Daniel",
booktitle = "Proceedings of the First Workshop on Advancing Natural Language Processing for Wikipedia",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wikinlp-1.3",
doi = "10.18653/v1/2024.wikinlp-1.3",
pages = "1--13",
abstract = "Large language models excel at creative generation but continue to struggle with the issues of hallucination and bias. While retrieval-augmented generation (RAG) provides a framework for grounding LLMs{'} responses in accurate and up-to-date information, it still raises the question of bias: which sources should be selected for inclusion in the context? And how should their importance be weighted? In this paper, we study the challenge of cross-lingual RAG and present a dataset to investigate the robustness of existing systems at answering queries about geopolitical disputes, which exist at the intersection of linguistic, cultural, and political boundaries. Our dataset is sourced from Wikipedia pages containing information relevant to the given queries and we investigate the impact of including additional context, as well as the composition of this context in terms of language and source, on an LLM{'}s response. Our results show that existing RAG systems continue to be challenged by cross-lingual use cases and suffer from a lack of consistency when they are provided with competing information in multiple languages. We present case studies to illustrate these issues and outline steps for future research to address these challenges.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-bordirlines">
<titleInfo>
<title>BordIRlines: A Dataset for Evaluating Cross-lingual Retrieval Augmented Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adwait</namePart>
<namePart type="family">Agashe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Advancing Natural Language Processing for Wikipedia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Lucie-Aimée</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tajuddeen</namePart>
<namePart type="family">Gwadabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isaac</namePart>
<namePart type="family">Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Petroni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">van Strien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models excel at creative generation but continue to struggle with the issues of hallucination and bias. While retrieval-augmented generation (RAG) provides a framework for grounding LLMs’ responses in accurate and up-to-date information, it still raises the question of bias: which sources should be selected for inclusion in the context? And how should their importance be weighted? In this paper, we study the challenge of cross-lingual RAG and present a dataset to investigate the robustness of existing systems at answering queries about geopolitical disputes, which exist at the intersection of linguistic, cultural, and political boundaries. Our dataset is sourced from Wikipedia pages containing information relevant to the given queries and we investigate the impact of including additional context, as well as the composition of this context in terms of language and source, on an LLM’s response. Our results show that existing RAG systems continue to be challenged by cross-lingual use cases and suffer from a lack of consistency when they are provided with competing information in multiple languages. We present case studies to illustrate these issues and outline steps for future research to address these challenges.</abstract>
<identifier type="citekey">li-etal-2024-bordirlines</identifier>
<identifier type="doi">10.18653/v1/2024.wikinlp-1.3</identifier>
<location>
<url>https://aclanthology.org/2024.wikinlp-1.3</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>1</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BordIRlines: A Dataset for Evaluating Cross-lingual Retrieval Augmented Generation
%A Li, Bryan
%A Haider, Samar
%A Luo, Fiona
%A Agashe, Adwait
%A Callison-Burch, Chris
%Y Lucie-Aimée, Lucie
%Y Fan, Angela
%Y Gwadabe, Tajuddeen
%Y Johnson, Isaac
%Y Petroni, Fabio
%Y van Strien, Daniel
%S Proceedings of the First Workshop on Advancing Natural Language Processing for Wikipedia
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F li-etal-2024-bordirlines
%X Large language models excel at creative generation but continue to struggle with the issues of hallucination and bias. While retrieval-augmented generation (RAG) provides a framework for grounding LLMs’ responses in accurate and up-to-date information, it still raises the question of bias: which sources should be selected for inclusion in the context? And how should their importance be weighted? In this paper, we study the challenge of cross-lingual RAG and present a dataset to investigate the robustness of existing systems at answering queries about geopolitical disputes, which exist at the intersection of linguistic, cultural, and political boundaries. Our dataset is sourced from Wikipedia pages containing information relevant to the given queries and we investigate the impact of including additional context, as well as the composition of this context in terms of language and source, on an LLM’s response. Our results show that existing RAG systems continue to be challenged by cross-lingual use cases and suffer from a lack of consistency when they are provided with competing information in multiple languages. We present case studies to illustrate these issues and outline steps for future research to address these challenges.
%R 10.18653/v1/2024.wikinlp-1.3
%U https://aclanthology.org/2024.wikinlp-1.3
%U https://doi.org/10.18653/v1/2024.wikinlp-1.3
%P 1-13
Markdown (Informal)
[BordIRlines: A Dataset for Evaluating Cross-lingual Retrieval Augmented Generation](https://aclanthology.org/2024.wikinlp-1.3) (Li et al., WikiNLP 2024)
ACL