@inproceedings{hosseini-etal-2020-deezymatch,
title = "{D}eezy{M}atch: A Flexible Deep Learning Approach to Fuzzy String Matching",
author = "Hosseini, Kasra and
Nanni, Federico and
Coll Ardanuy, Mariona",
editor = "Liu, Qun and
Schlangen, David",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = oct,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-demos.9/",
doi = "10.18653/v1/2020.emnlp-demos.9",
pages = "62--69",
abstract = "We present DeezyMatch, a free, open-source software library written in Python for fuzzy string matching and candidate ranking. Its pair classifier supports various deep neural network architectures for training new classifiers and for fine-tuning a pretrained model, which paves the way for transfer learning in fuzzy string matching. This approach is especially useful where only limited training examples are available. The learned DeezyMatch models can be used to generate rich vector representations from string inputs. The candidate ranker component in DeezyMatch uses these vector representations to find, for a given query, the best matching candidates in a knowledge base. It uses an adaptive searching algorithm applicable to large knowledge bases and query sets. We describe DeezyMatch`s functionality, design and implementation, accompanied by a use case in toponym matching and candidate ranking in realistic noisy datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hosseini-etal-2020-deezymatch">
<titleInfo>
<title>DeezyMatch: A Flexible Deep Learning Approach to Fuzzy String Matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kasra</namePart>
<namePart type="family">Hosseini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Nanni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariona</namePart>
<namePart type="family">Coll Ardanuy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present DeezyMatch, a free, open-source software library written in Python for fuzzy string matching and candidate ranking. Its pair classifier supports various deep neural network architectures for training new classifiers and for fine-tuning a pretrained model, which paves the way for transfer learning in fuzzy string matching. This approach is especially useful where only limited training examples are available. The learned DeezyMatch models can be used to generate rich vector representations from string inputs. The candidate ranker component in DeezyMatch uses these vector representations to find, for a given query, the best matching candidates in a knowledge base. It uses an adaptive searching algorithm applicable to large knowledge bases and query sets. We describe DeezyMatch‘s functionality, design and implementation, accompanied by a use case in toponym matching and candidate ranking in realistic noisy datasets.</abstract>
<identifier type="citekey">hosseini-etal-2020-deezymatch</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-demos.9</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-demos.9/</url>
</location>
<part>
<date>2020-10</date>
<extent unit="page">
<start>62</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeezyMatch: A Flexible Deep Learning Approach to Fuzzy String Matching
%A Hosseini, Kasra
%A Nanni, Federico
%A Coll Ardanuy, Mariona
%Y Liu, Qun
%Y Schlangen, David
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2020
%8 October
%I Association for Computational Linguistics
%C Online
%F hosseini-etal-2020-deezymatch
%X We present DeezyMatch, a free, open-source software library written in Python for fuzzy string matching and candidate ranking. Its pair classifier supports various deep neural network architectures for training new classifiers and for fine-tuning a pretrained model, which paves the way for transfer learning in fuzzy string matching. This approach is especially useful where only limited training examples are available. The learned DeezyMatch models can be used to generate rich vector representations from string inputs. The candidate ranker component in DeezyMatch uses these vector representations to find, for a given query, the best matching candidates in a knowledge base. It uses an adaptive searching algorithm applicable to large knowledge bases and query sets. We describe DeezyMatch‘s functionality, design and implementation, accompanied by a use case in toponym matching and candidate ranking in realistic noisy datasets.
%R 10.18653/v1/2020.emnlp-demos.9
%U https://aclanthology.org/2020.emnlp-demos.9/
%U https://doi.org/10.18653/v1/2020.emnlp-demos.9
%P 62-69
Markdown (Informal)
[DeezyMatch: A Flexible Deep Learning Approach to Fuzzy String Matching](https://aclanthology.org/2020.emnlp-demos.9/) (Hosseini et al., EMNLP 2020)
ACL