@inproceedings{kang-etal-2020-document,
title = "Document-Level Definition Detection in Scholarly Documents: Existing Models, Error Analyses, and Future Directions",
author = "Kang, Dongyeop and
Head, Andrew and
Sidhu, Risham and
Lo, Kyle and
Weld, Daniel and
Hearst, Marti A.",
editor = "Chandrasekaran, Muthu Kumar and
de Waard, Anita and
Feigenblat, Guy and
Freitag, Dayne and
Ghosal, Tirthankar and
Hovy, Eduard and
Knoth, Petr and
Konopnicki, David and
Mayr, Philipp and
Patton, Robert M. and
Shmueli-Scheuer, Michal",
booktitle = "Proceedings of the First Workshop on Scholarly Document Processing",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.sdp-1.22",
doi = "10.18653/v1/2020.sdp-1.22",
pages = "196--206",
abstract = "The task of definition detection is important for scholarly papers, because papers often make use of technical terminology that may be unfamiliar to readers. Despite prior work on definition detection, current approaches are far from being accurate enough to use in realworld applications. In this paper, we first perform in-depth error analysis of the current best performing definition detection system and discover major causes of errors. Based on this analysis, we develop a new definition detection system, HEDDEx, that utilizes syntactic features, transformer encoders, and heuristic filters, and evaluate it on a standard sentence-level benchmark. Because current benchmarks evaluate randomly sampled sentences, we propose an alternative evaluation that assesses every sentence within a document. This allows for evaluating recall in addition to precision. HEDDEx outperforms the leading system on both the sentence-level and the document-level tasks, by 12.7 F1 points and 14.4 F1 points, respectively. We note that performance on the high-recall document-level task is much lower than in the standard evaluation approach, due to the necessity of incorporation of document structure as features. We discuss remaining challenges in document-level definition detection, ideas for improvements, and potential issues for the development of reading aid applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kang-etal-2020-document">
<titleInfo>
<title>Document-Level Definition Detection in Scholarly Documents: Existing Models, Error Analyses, and Future Directions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongyeop</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Head</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Risham</namePart>
<namePart type="family">Sidhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Lo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Weld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marti</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Hearst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Scholarly Document Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Muthu</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Chandrasekaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">de Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Feigenblat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Knoth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Konopnicki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Patton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Shmueli-Scheuer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of definition detection is important for scholarly papers, because papers often make use of technical terminology that may be unfamiliar to readers. Despite prior work on definition detection, current approaches are far from being accurate enough to use in realworld applications. In this paper, we first perform in-depth error analysis of the current best performing definition detection system and discover major causes of errors. Based on this analysis, we develop a new definition detection system, HEDDEx, that utilizes syntactic features, transformer encoders, and heuristic filters, and evaluate it on a standard sentence-level benchmark. Because current benchmarks evaluate randomly sampled sentences, we propose an alternative evaluation that assesses every sentence within a document. This allows for evaluating recall in addition to precision. HEDDEx outperforms the leading system on both the sentence-level and the document-level tasks, by 12.7 F1 points and 14.4 F1 points, respectively. We note that performance on the high-recall document-level task is much lower than in the standard evaluation approach, due to the necessity of incorporation of document structure as features. We discuss remaining challenges in document-level definition detection, ideas for improvements, and potential issues for the development of reading aid applications.</abstract>
<identifier type="citekey">kang-etal-2020-document</identifier>
<identifier type="doi">10.18653/v1/2020.sdp-1.22</identifier>
<location>
<url>https://aclanthology.org/2020.sdp-1.22</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>196</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Document-Level Definition Detection in Scholarly Documents: Existing Models, Error Analyses, and Future Directions
%A Kang, Dongyeop
%A Head, Andrew
%A Sidhu, Risham
%A Lo, Kyle
%A Weld, Daniel
%A Hearst, Marti A.
%Y Chandrasekaran, Muthu Kumar
%Y de Waard, Anita
%Y Feigenblat, Guy
%Y Freitag, Dayne
%Y Ghosal, Tirthankar
%Y Hovy, Eduard
%Y Knoth, Petr
%Y Konopnicki, David
%Y Mayr, Philipp
%Y Patton, Robert M.
%Y Shmueli-Scheuer, Michal
%S Proceedings of the First Workshop on Scholarly Document Processing
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F kang-etal-2020-document
%X The task of definition detection is important for scholarly papers, because papers often make use of technical terminology that may be unfamiliar to readers. Despite prior work on definition detection, current approaches are far from being accurate enough to use in realworld applications. In this paper, we first perform in-depth error analysis of the current best performing definition detection system and discover major causes of errors. Based on this analysis, we develop a new definition detection system, HEDDEx, that utilizes syntactic features, transformer encoders, and heuristic filters, and evaluate it on a standard sentence-level benchmark. Because current benchmarks evaluate randomly sampled sentences, we propose an alternative evaluation that assesses every sentence within a document. This allows for evaluating recall in addition to precision. HEDDEx outperforms the leading system on both the sentence-level and the document-level tasks, by 12.7 F1 points and 14.4 F1 points, respectively. We note that performance on the high-recall document-level task is much lower than in the standard evaluation approach, due to the necessity of incorporation of document structure as features. We discuss remaining challenges in document-level definition detection, ideas for improvements, and potential issues for the development of reading aid applications.
%R 10.18653/v1/2020.sdp-1.22
%U https://aclanthology.org/2020.sdp-1.22
%U https://doi.org/10.18653/v1/2020.sdp-1.22
%P 196-206
Markdown (Informal)
[Document-Level Definition Detection in Scholarly Documents: Existing Models, Error Analyses, and Future Directions](https://aclanthology.org/2020.sdp-1.22) (Kang et al., sdp 2020)
ACL