@inproceedings{kerz-etal-2022-pushing,
title = "Pushing on Personality Detection from Verbal Behavior: A Transformer Meets Text Contours of Psycholinguistic Features",
author = "Kerz, Elma and
Qiao, Yu and
Zanwar, Sourabh and
Wiechmann, Daniel",
editor = "Barnes, Jeremy and
De Clercq, Orph{\'e}e and
Barriere, Valentin and
Tafreshi, Shabnam and
Alqahtani, Sawsan and
Sedoc, Jo{\~a}o and
Klinger, Roman and
Balahur, Alexandra",
booktitle = "Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wassa-1.17",
doi = "10.18653/v1/2022.wassa-1.17",
pages = "182--194",
abstract = "Research at the intersection of personality psychology, computer science, and linguistics has recently focused increasingly on modeling and predicting personality from language use. We report two major improvements in predicting personality traits from text data: (1) to our knowledge, the most comprehensive set of theory-based psycholinguistic features and (2) hybrid models that integrate a pre-trained Transformer Language Model BERT and Bidirectional Long Short-Term Memory (BLSTM) networks trained on within-text distributions ({`}text contours{'}) of psycholinguistic features. We experiment with BLSTM models (with and without Attention) and with two techniques for applying pre-trained language representations from the transformer model - {`}feature-based{'} and {`}fine-tuning{'}. We evaluate the performance of the models we built on two benchmark datasets that target the two dominant theoretical models of personality: the Big Five Essay dataset (Pennebaker and King, 1999) and the MBTI Kaggle dataset (Li et al., 2018). Our results are encouraging as our models outperform existing work on the same datasets. More specifically, our models achieve improvement in classification accuracy by 2.9{\%} on the Essay dataset and 8.28{\%} on the Kaggle MBTI dataset. In addition, we perform ablation experiments to quantify the impact of different categories of psycholinguistic features in the respective personality prediction models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kerz-etal-2022-pushing">
<titleInfo>
<title>Pushing on Personality Detection from Verbal Behavior: A Transformer Meets Text Contours of Psycholinguistic Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elma</namePart>
<namePart type="family">Kerz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Qiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sourabh</namePart>
<namePart type="family">Zanwar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Wiechmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment & Social Media Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Barnes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orphée</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Barriere</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sawsan</namePart>
<namePart type="family">Alqahtani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Balahur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research at the intersection of personality psychology, computer science, and linguistics has recently focused increasingly on modeling and predicting personality from language use. We report two major improvements in predicting personality traits from text data: (1) to our knowledge, the most comprehensive set of theory-based psycholinguistic features and (2) hybrid models that integrate a pre-trained Transformer Language Model BERT and Bidirectional Long Short-Term Memory (BLSTM) networks trained on within-text distributions (‘text contours’) of psycholinguistic features. We experiment with BLSTM models (with and without Attention) and with two techniques for applying pre-trained language representations from the transformer model - ‘feature-based’ and ‘fine-tuning’. We evaluate the performance of the models we built on two benchmark datasets that target the two dominant theoretical models of personality: the Big Five Essay dataset (Pennebaker and King, 1999) and the MBTI Kaggle dataset (Li et al., 2018). Our results are encouraging as our models outperform existing work on the same datasets. More specifically, our models achieve improvement in classification accuracy by 2.9% on the Essay dataset and 8.28% on the Kaggle MBTI dataset. In addition, we perform ablation experiments to quantify the impact of different categories of psycholinguistic features in the respective personality prediction models.</abstract>
<identifier type="citekey">kerz-etal-2022-pushing</identifier>
<identifier type="doi">10.18653/v1/2022.wassa-1.17</identifier>
<location>
<url>https://aclanthology.org/2022.wassa-1.17</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>182</start>
<end>194</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pushing on Personality Detection from Verbal Behavior: A Transformer Meets Text Contours of Psycholinguistic Features
%A Kerz, Elma
%A Qiao, Yu
%A Zanwar, Sourabh
%A Wiechmann, Daniel
%Y Barnes, Jeremy
%Y De Clercq, Orphée
%Y Barriere, Valentin
%Y Tafreshi, Shabnam
%Y Alqahtani, Sawsan
%Y Sedoc, João
%Y Klinger, Roman
%Y Balahur, Alexandra
%S Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment & Social Media Analysis
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F kerz-etal-2022-pushing
%X Research at the intersection of personality psychology, computer science, and linguistics has recently focused increasingly on modeling and predicting personality from language use. We report two major improvements in predicting personality traits from text data: (1) to our knowledge, the most comprehensive set of theory-based psycholinguistic features and (2) hybrid models that integrate a pre-trained Transformer Language Model BERT and Bidirectional Long Short-Term Memory (BLSTM) networks trained on within-text distributions (‘text contours’) of psycholinguistic features. We experiment with BLSTM models (with and without Attention) and with two techniques for applying pre-trained language representations from the transformer model - ‘feature-based’ and ‘fine-tuning’. We evaluate the performance of the models we built on two benchmark datasets that target the two dominant theoretical models of personality: the Big Five Essay dataset (Pennebaker and King, 1999) and the MBTI Kaggle dataset (Li et al., 2018). Our results are encouraging as our models outperform existing work on the same datasets. More specifically, our models achieve improvement in classification accuracy by 2.9% on the Essay dataset and 8.28% on the Kaggle MBTI dataset. In addition, we perform ablation experiments to quantify the impact of different categories of psycholinguistic features in the respective personality prediction models.
%R 10.18653/v1/2022.wassa-1.17
%U https://aclanthology.org/2022.wassa-1.17
%U https://doi.org/10.18653/v1/2022.wassa-1.17
%P 182-194
Markdown (Informal)
[Pushing on Personality Detection from Verbal Behavior: A Transformer Meets Text Contours of Psycholinguistic Features](https://aclanthology.org/2022.wassa-1.17) (Kerz et al., WASSA 2022)
ACL