@inproceedings{cohen-etal-2022-mcphrasy,
title = "{M}c{P}hra{S}y: Multi-Context Phrase Similarity and Clustering",
author = "Cohen, Amir and
Gonen, Hila and
Shapira, Ori and
Levy, Ran and
Goldberg, Yoav",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.259",
doi = "10.18653/v1/2022.findings-emnlp.259",
pages = "3538--3550",
abstract = "Phrase similarity is a key component of many NLP applications. Current phrase similarity methods focus on embedding the phrase itself and use the phrase context only during training of the pretrained model. To better leverage the information in the context, we propose McPhraSy (Multi-context Phrase Similarity), a novel algorithm for estimating the similarity of phrases based on multiple contexts. At inference time, McPhraSy represents each phrase by considering multiple contexts in which it appears and computes the similarity of two phrases by aggregating the pairwise similarities between the contexts of the phrases. Incorporating context during inference enables McPhraSy to outperform current state-of-the-art models on two phrase similarity datasets by up to 13.3{\%}. Finally, we also present a new downstream task that relies on phrase similarity {--} keyphrase clustering {--} and create a new benchmark for it in the product reviews domain. We show that McPhraSy surpasses all other baselines for this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cohen-etal-2022-mcphrasy">
<titleInfo>
<title>McPhraSy: Multi-Context Phrase Similarity and Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hila</namePart>
<namePart type="family">Gonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ori</namePart>
<namePart type="family">Shapira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ran</namePart>
<namePart type="family">Levy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Phrase similarity is a key component of many NLP applications. Current phrase similarity methods focus on embedding the phrase itself and use the phrase context only during training of the pretrained model. To better leverage the information in the context, we propose McPhraSy (Multi-context Phrase Similarity), a novel algorithm for estimating the similarity of phrases based on multiple contexts. At inference time, McPhraSy represents each phrase by considering multiple contexts in which it appears and computes the similarity of two phrases by aggregating the pairwise similarities between the contexts of the phrases. Incorporating context during inference enables McPhraSy to outperform current state-of-the-art models on two phrase similarity datasets by up to 13.3%. Finally, we also present a new downstream task that relies on phrase similarity – keyphrase clustering – and create a new benchmark for it in the product reviews domain. We show that McPhraSy surpasses all other baselines for this task.</abstract>
<identifier type="citekey">cohen-etal-2022-mcphrasy</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.259</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.259</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>3538</start>
<end>3550</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T McPhraSy: Multi-Context Phrase Similarity and Clustering
%A Cohen, Amir
%A Gonen, Hila
%A Shapira, Ori
%A Levy, Ran
%A Goldberg, Yoav
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F cohen-etal-2022-mcphrasy
%X Phrase similarity is a key component of many NLP applications. Current phrase similarity methods focus on embedding the phrase itself and use the phrase context only during training of the pretrained model. To better leverage the information in the context, we propose McPhraSy (Multi-context Phrase Similarity), a novel algorithm for estimating the similarity of phrases based on multiple contexts. At inference time, McPhraSy represents each phrase by considering multiple contexts in which it appears and computes the similarity of two phrases by aggregating the pairwise similarities between the contexts of the phrases. Incorporating context during inference enables McPhraSy to outperform current state-of-the-art models on two phrase similarity datasets by up to 13.3%. Finally, we also present a new downstream task that relies on phrase similarity – keyphrase clustering – and create a new benchmark for it in the product reviews domain. We show that McPhraSy surpasses all other baselines for this task.
%R 10.18653/v1/2022.findings-emnlp.259
%U https://aclanthology.org/2022.findings-emnlp.259
%U https://doi.org/10.18653/v1/2022.findings-emnlp.259
%P 3538-3550
Markdown (Informal)
[McPhraSy: Multi-Context Phrase Similarity and Clustering](https://aclanthology.org/2022.findings-emnlp.259) (Cohen et al., Findings 2022)
ACL
- Amir Cohen, Hila Gonen, Ori Shapira, Ran Levy, and Yoav Goldberg. 2022. McPhraSy: Multi-Context Phrase Similarity and Clustering. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 3538–3550, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.