@inproceedings{singh-2024-jigsaw,
title = "Jigsaw Pieces of Meaning: Modeling Discourse Coherence with Informed Negative Sample Synthesis",
author = "Singh, Shubhankar",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.128",
pages = "1895--1908",
abstract = "Coherence in discourse is fundamental for comprehension and perception. Much research on coherence modeling has focused on better model architectures and training setups optimizing on the permuted document task, where random permutations of a coherent document are considered incoherent. However, there{'}s very limited work on creating {``}informed{''} synthetic incoherent samples that better represent or mimic incoherence. We source a diverse positive corpus for local coherence and propose six rule-based methods leveraging information from Constituency trees, Part-of-speech, semantic overlap and more, for {``}informed{''} negative sample synthesis for better representation of incoherence. We keep a straightforward training setup for local coherence modeling by fine-tuning popular transformer models, and aggregate local scores for global coherence. We evaluate on a battery of independent downstream tasks to assess the impact of improved negative sample quality. We assert that a step towards optimality for coherence modeling requires better negative sample synthesis in tandem with model improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-2024-jigsaw">
<titleInfo>
<title>Jigsaw Pieces of Meaning: Modeling Discourse Coherence with Informed Negative Sample Synthesis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubhankar</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Coherence in discourse is fundamental for comprehension and perception. Much research on coherence modeling has focused on better model architectures and training setups optimizing on the permuted document task, where random permutations of a coherent document are considered incoherent. However, there’s very limited work on creating “informed” synthetic incoherent samples that better represent or mimic incoherence. We source a diverse positive corpus for local coherence and propose six rule-based methods leveraging information from Constituency trees, Part-of-speech, semantic overlap and more, for “informed” negative sample synthesis for better representation of incoherence. We keep a straightforward training setup for local coherence modeling by fine-tuning popular transformer models, and aggregate local scores for global coherence. We evaluate on a battery of independent downstream tasks to assess the impact of improved negative sample quality. We assert that a step towards optimality for coherence modeling requires better negative sample synthesis in tandem with model improvements.</abstract>
<identifier type="citekey">singh-2024-jigsaw</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.128</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1895</start>
<end>1908</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Jigsaw Pieces of Meaning: Modeling Discourse Coherence with Informed Negative Sample Synthesis
%A Singh, Shubhankar
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F singh-2024-jigsaw
%X Coherence in discourse is fundamental for comprehension and perception. Much research on coherence modeling has focused on better model architectures and training setups optimizing on the permuted document task, where random permutations of a coherent document are considered incoherent. However, there’s very limited work on creating “informed” synthetic incoherent samples that better represent or mimic incoherence. We source a diverse positive corpus for local coherence and propose six rule-based methods leveraging information from Constituency trees, Part-of-speech, semantic overlap and more, for “informed” negative sample synthesis for better representation of incoherence. We keep a straightforward training setup for local coherence modeling by fine-tuning popular transformer models, and aggregate local scores for global coherence. We evaluate on a battery of independent downstream tasks to assess the impact of improved negative sample quality. We assert that a step towards optimality for coherence modeling requires better negative sample synthesis in tandem with model improvements.
%U https://aclanthology.org/2024.findings-eacl.128
%P 1895-1908
Markdown (Informal)
[Jigsaw Pieces of Meaning: Modeling Discourse Coherence with Informed Negative Sample Synthesis](https://aclanthology.org/2024.findings-eacl.128) (Singh, Findings 2024)
ACL