@inproceedings{luo-etal-2024-large,
title = "Large Language Models as Foundations for Next-Gen Dense Retrieval: A Comprehensive Empirical Assessment",
author = "Luo, Kun and
Qin, Minghao and
Liu, Zheng and
Xiao, Shitao and
Zhao, Jun and
Liu, Kang",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.80",
doi = "10.18653/v1/2024.emnlp-main.80",
pages = "1354--1365",
abstract = "Pre-trained language models like BERT and T5 serve as crucial backbone encoders for dense retrieval. However, these models often exhibit limited generalization capabilities and face challenges in improving in-domain accuracy. Recent research has explored using large language models (LLMs) as retrievers, achieving state-of-the-art performance across various tasks. Despite these advancements, the specific benefits of LLMs over traditional retrievers and the impact of different LLM configurations{---}such as parameter sizes, pre-training duration, and alignment processes{---}on retrieval tasks remain unclear. In this work, we conduct a comprehensive empirical study on a wide range of retrieval tasks, including in-domain accuracy, data efficiency, zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. We evaluate over 15 different backbone LLMs and non-LLMs. Our findings reveal that larger models and extensive pre-training consistently enhance in-domain accuracy and data efficiency. Additionally, larger models demonstrate significant potential in zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. These results underscore the advantages of LLMs as versatile and effective backbone encoders in dense retrieval, providing valuable insights for future research and development in this field.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2024-large">
<titleInfo>
<title>Large Language Models as Foundations for Next-Gen Dense Retrieval: A Comprehensive Empirical Assessment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minghao</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shitao</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-trained language models like BERT and T5 serve as crucial backbone encoders for dense retrieval. However, these models often exhibit limited generalization capabilities and face challenges in improving in-domain accuracy. Recent research has explored using large language models (LLMs) as retrievers, achieving state-of-the-art performance across various tasks. Despite these advancements, the specific benefits of LLMs over traditional retrievers and the impact of different LLM configurations—such as parameter sizes, pre-training duration, and alignment processes—on retrieval tasks remain unclear. In this work, we conduct a comprehensive empirical study on a wide range of retrieval tasks, including in-domain accuracy, data efficiency, zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. We evaluate over 15 different backbone LLMs and non-LLMs. Our findings reveal that larger models and extensive pre-training consistently enhance in-domain accuracy and data efficiency. Additionally, larger models demonstrate significant potential in zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. These results underscore the advantages of LLMs as versatile and effective backbone encoders in dense retrieval, providing valuable insights for future research and development in this field.</abstract>
<identifier type="citekey">luo-etal-2024-large</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.80</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.80</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>1354</start>
<end>1365</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Large Language Models as Foundations for Next-Gen Dense Retrieval: A Comprehensive Empirical Assessment
%A Luo, Kun
%A Qin, Minghao
%A Liu, Zheng
%A Xiao, Shitao
%A Zhao, Jun
%A Liu, Kang
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F luo-etal-2024-large
%X Pre-trained language models like BERT and T5 serve as crucial backbone encoders for dense retrieval. However, these models often exhibit limited generalization capabilities and face challenges in improving in-domain accuracy. Recent research has explored using large language models (LLMs) as retrievers, achieving state-of-the-art performance across various tasks. Despite these advancements, the specific benefits of LLMs over traditional retrievers and the impact of different LLM configurations—such as parameter sizes, pre-training duration, and alignment processes—on retrieval tasks remain unclear. In this work, we conduct a comprehensive empirical study on a wide range of retrieval tasks, including in-domain accuracy, data efficiency, zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. We evaluate over 15 different backbone LLMs and non-LLMs. Our findings reveal that larger models and extensive pre-training consistently enhance in-domain accuracy and data efficiency. Additionally, larger models demonstrate significant potential in zero-shot generalization, lengthy retrieval, instruction-based retrieval, and multi-task learning. These results underscore the advantages of LLMs as versatile and effective backbone encoders in dense retrieval, providing valuable insights for future research and development in this field.
%R 10.18653/v1/2024.emnlp-main.80
%U https://aclanthology.org/2024.emnlp-main.80
%U https://doi.org/10.18653/v1/2024.emnlp-main.80
%P 1354-1365
Markdown (Informal)
[Large Language Models as Foundations for Next-Gen Dense Retrieval: A Comprehensive Empirical Assessment](https://aclanthology.org/2024.emnlp-main.80) (Luo et al., EMNLP 2024)
ACL