@inproceedings{paul-etal-2024-refiner,
title = "{REFINER}: Reasoning Feedback on Intermediate Representations",
author = "Paul, Debjit and
Ismayilzada, Mete and
Peyrard, Maxime and
Borges, Beatriz and
Bosselut, Antoine and
West, Robert and
Faltings, Boi",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-long.67",
pages = "1100--1126",
abstract = "Language models (LMs) have recently shown remarkable performance on reasoning tasks by explicitly generating intermediate inferences,e.g., chain-of-thought prompting. However, these intermediate inference steps may be inappropriate deductions from the initial contextand lead to incorrect final predictions. Here we introduce REFINER, a framework for finetuning LMs to explicitly generate intermediate reasoning steps while interacting with a critic model that provides automated feedback on the reasoning. Specifically, the critic provides structured feedback that the reasoning LM uses to iteratively improve its intermediate arguments. Empirical evaluations of REFINER on three diverse reasoning tasks show significant improvements over baseline LMs of comparable scale. Furthermore, when using GPT-3.5 or ChatGPT as the reasoner, the trained critic significantly improves reasoning without finetuning the reasoner. Finally, our critic model is trained without expensive human-in-the-loop data but can be substituted with humans at inference time.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paul-etal-2024-refiner">
<titleInfo>
<title>REFINER: Reasoning Feedback on Intermediate Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Debjit</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mete</namePart>
<namePart type="family">Ismayilzada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxime</namePart>
<namePart type="family">Peyrard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beatriz</namePart>
<namePart type="family">Borges</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">West</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boi</namePart>
<namePart type="family">Faltings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models (LMs) have recently shown remarkable performance on reasoning tasks by explicitly generating intermediate inferences,e.g., chain-of-thought prompting. However, these intermediate inference steps may be inappropriate deductions from the initial contextand lead to incorrect final predictions. Here we introduce REFINER, a framework for finetuning LMs to explicitly generate intermediate reasoning steps while interacting with a critic model that provides automated feedback on the reasoning. Specifically, the critic provides structured feedback that the reasoning LM uses to iteratively improve its intermediate arguments. Empirical evaluations of REFINER on three diverse reasoning tasks show significant improvements over baseline LMs of comparable scale. Furthermore, when using GPT-3.5 or ChatGPT as the reasoner, the trained critic significantly improves reasoning without finetuning the reasoner. Finally, our critic model is trained without expensive human-in-the-loop data but can be substituted with humans at inference time.</abstract>
<identifier type="citekey">paul-etal-2024-refiner</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-long.67</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1100</start>
<end>1126</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T REFINER: Reasoning Feedback on Intermediate Representations
%A Paul, Debjit
%A Ismayilzada, Mete
%A Peyrard, Maxime
%A Borges, Beatriz
%A Bosselut, Antoine
%A West, Robert
%A Faltings, Boi
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F paul-etal-2024-refiner
%X Language models (LMs) have recently shown remarkable performance on reasoning tasks by explicitly generating intermediate inferences,e.g., chain-of-thought prompting. However, these intermediate inference steps may be inappropriate deductions from the initial contextand lead to incorrect final predictions. Here we introduce REFINER, a framework for finetuning LMs to explicitly generate intermediate reasoning steps while interacting with a critic model that provides automated feedback on the reasoning. Specifically, the critic provides structured feedback that the reasoning LM uses to iteratively improve its intermediate arguments. Empirical evaluations of REFINER on three diverse reasoning tasks show significant improvements over baseline LMs of comparable scale. Furthermore, when using GPT-3.5 or ChatGPT as the reasoner, the trained critic significantly improves reasoning without finetuning the reasoner. Finally, our critic model is trained without expensive human-in-the-loop data but can be substituted with humans at inference time.
%U https://aclanthology.org/2024.eacl-long.67
%P 1100-1126
Markdown (Informal)
[REFINER: Reasoning Feedback on Intermediate Representations](https://aclanthology.org/2024.eacl-long.67) (Paul et al., EACL 2024)
ACL
- Debjit Paul, Mete Ismayilzada, Maxime Peyrard, Beatriz Borges, Antoine Bosselut, Robert West, and Boi Faltings. 2024. REFINER: Reasoning Feedback on Intermediate Representations. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1100–1126, St. Julian’s, Malta. Association for Computational Linguistics.