@inproceedings{wang-etal-2024-soft-knowledge,
title = "Soft Knowledge Prompt: Help External Knowledge Become a Better Teacher to Instruct {LLM} in Knowledge-based {VQA}",
author = "Wang, Qunbo and
Ji, Ruyi and
Peng, Tianhao and
Wu, Wenjun and
Li, Zechao and
Liu, Jing",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.332",
doi = "10.18653/v1/2024.acl-long.332",
pages = "6132--6143",
abstract = "LLM has achieved impressive performance on multi-modal tasks, which have received ever-increasing research attention. Recent research focuses on improving prediction performance and reliability (e.g., addressing the hallucination problem). They often prepend relevant external knowledge to the input text as an extra prompt. However, these methods would be affected by the noise in the knowledge and the context length limitation of LLM. In our work, we focus on making better use of external knowledge and propose a method to actively extract valuable information in the knowledge to produce the latent vector as a soft prompt, which is then fused with the image embedding to form a knowledge-enhanced context to instruct LLM. The experimental results on knowledge-based VQA benchmarks show that the proposed method enjoys better utilization of external knowledge and helps the model achieve better performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2024-soft-knowledge">
<titleInfo>
<title>Soft Knowledge Prompt: Help External Knowledge Become a Better Teacher to Instruct LLM in Knowledge-based VQA</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qunbo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruyi</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianhao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zechao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>LLM has achieved impressive performance on multi-modal tasks, which have received ever-increasing research attention. Recent research focuses on improving prediction performance and reliability (e.g., addressing the hallucination problem). They often prepend relevant external knowledge to the input text as an extra prompt. However, these methods would be affected by the noise in the knowledge and the context length limitation of LLM. In our work, we focus on making better use of external knowledge and propose a method to actively extract valuable information in the knowledge to produce the latent vector as a soft prompt, which is then fused with the image embedding to form a knowledge-enhanced context to instruct LLM. The experimental results on knowledge-based VQA benchmarks show that the proposed method enjoys better utilization of external knowledge and helps the model achieve better performance.</abstract>
<identifier type="citekey">wang-etal-2024-soft-knowledge</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.332</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.332</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>6132</start>
<end>6143</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Soft Knowledge Prompt: Help External Knowledge Become a Better Teacher to Instruct LLM in Knowledge-based VQA
%A Wang, Qunbo
%A Ji, Ruyi
%A Peng, Tianhao
%A Wu, Wenjun
%A Li, Zechao
%A Liu, Jing
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F wang-etal-2024-soft-knowledge
%X LLM has achieved impressive performance on multi-modal tasks, which have received ever-increasing research attention. Recent research focuses on improving prediction performance and reliability (e.g., addressing the hallucination problem). They often prepend relevant external knowledge to the input text as an extra prompt. However, these methods would be affected by the noise in the knowledge and the context length limitation of LLM. In our work, we focus on making better use of external knowledge and propose a method to actively extract valuable information in the knowledge to produce the latent vector as a soft prompt, which is then fused with the image embedding to form a knowledge-enhanced context to instruct LLM. The experimental results on knowledge-based VQA benchmarks show that the proposed method enjoys better utilization of external knowledge and helps the model achieve better performance.
%R 10.18653/v1/2024.acl-long.332
%U https://aclanthology.org/2024.acl-long.332
%U https://doi.org/10.18653/v1/2024.acl-long.332
%P 6132-6143
Markdown (Informal)
[Soft Knowledge Prompt: Help External Knowledge Become a Better Teacher to Instruct LLM in Knowledge-based VQA](https://aclanthology.org/2024.acl-long.332) (Wang et al., ACL 2024)
ACL