@inproceedings{demus-etal-2022-comprehensive,
title = "DeTox: A Comprehensive Dataset for {G}erman Offensive Language and Conversation Analysis",
author = {Demus, Christoph and
Pitz, Jonas and
Sch{\"u}tz, Mina and
Probol, Nadine and
Siegel, Melanie and
Labudde, Dirk},
editor = "Narang, Kanika and
Mostafazadeh Davani, Aida and
Mathias, Lambert and
Vidgen, Bertie and
Talat, Zeerak",
booktitle = "Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2022",
address = "Seattle, Washington (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.woah-1.14/",
doi = "10.18653/v1/2022.woah-1.14",
pages = "143--153",
abstract = "In this work, we present a new publicly available offensive language dataset of 10.278 German social media comments collected in the first half of 2021 that were annotated by in total six annotators. With twelve different annotation categories, it is far more comprehensive than other datasets, and goes beyond just hate speech detection. The labels aim in particular also at toxicity, criminal relevance and discrimination types of comments. Furthermore, about half of the comments are from coherent parts of conversations, which opens the possibility to consider the comments' contexts and do conversation analyses in order to research the contagion of offensive language in conversations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="demus-etal-2022-comprehensive">
<titleInfo>
<title>DeTox: A Comprehensive Dataset for German Offensive Language and Conversation Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Demus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Pitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mina</namePart>
<namePart type="family">Schütz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadine</namePart>
<namePart type="family">Probol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melanie</namePart>
<namePart type="family">Siegel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Labudde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanika</namePart>
<namePart type="family">Narang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lambert</namePart>
<namePart type="family">Mathias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we present a new publicly available offensive language dataset of 10.278 German social media comments collected in the first half of 2021 that were annotated by in total six annotators. With twelve different annotation categories, it is far more comprehensive than other datasets, and goes beyond just hate speech detection. The labels aim in particular also at toxicity, criminal relevance and discrimination types of comments. Furthermore, about half of the comments are from coherent parts of conversations, which opens the possibility to consider the comments’ contexts and do conversation analyses in order to research the contagion of offensive language in conversations.</abstract>
<identifier type="citekey">demus-etal-2022-comprehensive</identifier>
<identifier type="doi">10.18653/v1/2022.woah-1.14</identifier>
<location>
<url>https://aclanthology.org/2022.woah-1.14/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>143</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeTox: A Comprehensive Dataset for German Offensive Language and Conversation Analysis
%A Demus, Christoph
%A Pitz, Jonas
%A Schütz, Mina
%A Probol, Nadine
%A Siegel, Melanie
%A Labudde, Dirk
%Y Narang, Kanika
%Y Mostafazadeh Davani, Aida
%Y Mathias, Lambert
%Y Vidgen, Bertie
%Y Talat, Zeerak
%S Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington (Hybrid)
%F demus-etal-2022-comprehensive
%X In this work, we present a new publicly available offensive language dataset of 10.278 German social media comments collected in the first half of 2021 that were annotated by in total six annotators. With twelve different annotation categories, it is far more comprehensive than other datasets, and goes beyond just hate speech detection. The labels aim in particular also at toxicity, criminal relevance and discrimination types of comments. Furthermore, about half of the comments are from coherent parts of conversations, which opens the possibility to consider the comments’ contexts and do conversation analyses in order to research the contagion of offensive language in conversations.
%R 10.18653/v1/2022.woah-1.14
%U https://aclanthology.org/2022.woah-1.14/
%U https://doi.org/10.18653/v1/2022.woah-1.14
%P 143-153
Markdown (Informal)
[DeTox: A Comprehensive Dataset for German Offensive Language and Conversation Analysis](https://aclanthology.org/2022.woah-1.14/) (Demus et al., WOAH 2022)
ACL