@inproceedings{wenjie-etal-2020-bcth,
title = "{BCTH}: A Novel Text Hashing Approach via {B}ayesian Clustering",
author = "Wenjie, Ying and
Le, Yuquan and
Xiong, Hantao",
editor = "Wong, Kam-Fai and
Knight, Kevin and
Wu, Hua",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.aacl-main.7",
doi = "10.18653/v1/2020.aacl-main.7",
pages = "54--62",
abstract = "Similarity search is to find the most similar items for a certain target item. The ability of similarity search at large scale plays a significant role in many information retrieval applications, and thus has received much attention. Text hashing is a promising strategy, which utilizes binary encoding to represent documents, obtaining attractive performance. This paper makes the first attempt to utilize Bayesian Clustering for Text Hashing, dubbed as BCTH. Specifically, BCTH is able to map documents to binary codes by utilizing multiple Bayesian Clusterings in parallel, where each Bayesian Clustering is responsible for one bit. Our approach employs the bit-balanced constraint to maximize the amount of information in each bit. Meanwhile, the bit-uncorrected constraint is adopted to keep the independence among all bits. The time complexity of BCTH is linear, where the hash codes and hash function are jointly learned. The experimental results, based on four widely-used datasets, demonstrate that BCTH is competitive, compared with currently competitive baselines in the perspective of both precision and training speed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wenjie-etal-2020-bcth">
<titleInfo>
<title>BCTH: A Novel Text Hashing Approach via Bayesian Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Wenjie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuquan</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hantao</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kam-Fai</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Knight</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Similarity search is to find the most similar items for a certain target item. The ability of similarity search at large scale plays a significant role in many information retrieval applications, and thus has received much attention. Text hashing is a promising strategy, which utilizes binary encoding to represent documents, obtaining attractive performance. This paper makes the first attempt to utilize Bayesian Clustering for Text Hashing, dubbed as BCTH. Specifically, BCTH is able to map documents to binary codes by utilizing multiple Bayesian Clusterings in parallel, where each Bayesian Clustering is responsible for one bit. Our approach employs the bit-balanced constraint to maximize the amount of information in each bit. Meanwhile, the bit-uncorrected constraint is adopted to keep the independence among all bits. The time complexity of BCTH is linear, where the hash codes and hash function are jointly learned. The experimental results, based on four widely-used datasets, demonstrate that BCTH is competitive, compared with currently competitive baselines in the perspective of both precision and training speed.</abstract>
<identifier type="citekey">wenjie-etal-2020-bcth</identifier>
<identifier type="doi">10.18653/v1/2020.aacl-main.7</identifier>
<location>
<url>https://aclanthology.org/2020.aacl-main.7</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>54</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BCTH: A Novel Text Hashing Approach via Bayesian Clustering
%A Wenjie, Ying
%A Le, Yuquan
%A Xiong, Hantao
%Y Wong, Kam-Fai
%Y Knight, Kevin
%Y Wu, Hua
%S Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F wenjie-etal-2020-bcth
%X Similarity search is to find the most similar items for a certain target item. The ability of similarity search at large scale plays a significant role in many information retrieval applications, and thus has received much attention. Text hashing is a promising strategy, which utilizes binary encoding to represent documents, obtaining attractive performance. This paper makes the first attempt to utilize Bayesian Clustering for Text Hashing, dubbed as BCTH. Specifically, BCTH is able to map documents to binary codes by utilizing multiple Bayesian Clusterings in parallel, where each Bayesian Clustering is responsible for one bit. Our approach employs the bit-balanced constraint to maximize the amount of information in each bit. Meanwhile, the bit-uncorrected constraint is adopted to keep the independence among all bits. The time complexity of BCTH is linear, where the hash codes and hash function are jointly learned. The experimental results, based on four widely-used datasets, demonstrate that BCTH is competitive, compared with currently competitive baselines in the perspective of both precision and training speed.
%R 10.18653/v1/2020.aacl-main.7
%U https://aclanthology.org/2020.aacl-main.7
%U https://doi.org/10.18653/v1/2020.aacl-main.7
%P 54-62
Markdown (Informal)
[BCTH: A Novel Text Hashing Approach via Bayesian Clustering](https://aclanthology.org/2020.aacl-main.7) (Wenjie et al., AACL 2020)
ACL
- Ying Wenjie, Yuquan Le, and Hantao Xiong. 2020. BCTH: A Novel Text Hashing Approach via Bayesian Clustering. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing, pages 54–62, Suzhou, China. Association for Computational Linguistics.