From a2e1548ee666c4f4b0d625ca788935f32fcc2dc9 Mon Sep 17 00:00:00 2001 From: Lyudmila Vaseva <vaseva@mi.fu-berlin.de> Date: Sun, 24 Feb 2019 12:53:51 +0100 Subject: [PATCH] Deepen potential harassment investigation --- src/explore.ipynb | 159 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 140 insertions(+), 19 deletions(-) diff --git a/src/explore.ipynb b/src/explore.ipynb index d1e1e2f..58fd1af 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -715,8 +715,144 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Potential harassment\n", + "## Potential harassment" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " af_id af_hidden af_public_comments \\\n", + "14 189 0 BLP vandalism or libel \n", + "16 380 1 Multiple obscenities \n", + "23 686 0 IP adding possibly unreferenced material to BLP \n", + "42 247 1 Adding emails in articles \n", + "45 11 0 You/He/She/It sucks \n", + "53 339 0 Claims of homosexuality, bisexuality, or trans... \n", + "72 9 0 Personal attacks by unregistered or new user \n", + "74 466 1 Userspace & talk page spamming \n", + "93 460 0 Feedback: Foul words \n", + "136 478 1 Talk page abuse \n", + "148 97 0 Personal attacks by new user \n", + "150 294 1 Personal attacks \n", + "154 463 1 Feedback: Adding email addresses \n", + "156 874 1 Long term abuse username / impersonation creat... \n", + "168 497 0 Feedback: Common Vandalism 5 \n", + "173 494 0 Feedback: Common Vandalism 2 \n", + "187 495 0 Feedback: Common Vandalism 3 \n", + "190 496 0 Feedback: Common Vandalism 4 \n", + "199 233 1 Targeted user talk vandalism \n", + "202 472 0 Feedback: Addition of bad words \n", + "209 273 1 Repeated obscenities \n", + "212 667 1 Wikipedia:Long-term abuse/Best known for IP \n", + "215 179 1 Ongoing attacks \n", + "226 921 0 Suspicious claims of nazism \n", + "227 475 0 Feedback: Vandalism or libel \n", + "236 474 0 Feedback: Comment with only obscenities \n", + "245 755 1 LTA trolling \n", + "247 834 1 Possible attacks on other users \n", + "250 341 1 Persistent talk page abuse from IP ranges \n", + "255 859 1 LTA #859 \n", + "258 739 1 User talk vandal \n", + "286 445 1 Disruptive user threatening self harm \n", + "309 742 1 BT UserTalkPage stalker \n", + "349 461 0 Feedback: Vandalism in all caps \n", + "360 389 1 Attacks on user talk pages \n", + "445 513 1 Persistent talk page abuse from IP ranges II \n", + "457 769 1 AfD nomination harassment \n", + "465 288 1 IP reverting/wikistalking by banned user \n", + "485 140 1 Long-term harassment case \n", + "506 567 1 Disruptive CSD nominations \n", + "512 660 1 Possibly JarlaxleArtemis (LTA) \n", + "522 330 1 Attacks on editors \n", + "524 293 1 J.delanoy attacks \n", + "527 263 1 Serafin - talk page abuse \n", + "544 480 1 Swamilive \n", + "554 945 1 proxy talk page abuse \n", + "571 792 1 Harassment \n", + "594 938 1 Targeted abuse \n", + "651 319 1 Attacks on User:Rodhullandemu \n", + "664 884 1 Nsmutte \n", + "668 462 1 Serial harassment of MuZemike \n", + "671 556 1 Impersonation usernames \n", + "752 318 1 School attacks \n", + "833 537 0 Transient harassment sites filter \n", + "947 754 1 Harassment by IP hopper \n", + "\n", + " manual_tags \n", + "14 vandalism, harassment \n", + "16 vandalism, harassment \n", + "23 vandalism, harassment, biased_pov \n", + "42 misc?, harassment? \n", + "45 vandalism, harassment \n", + "53 vandalism, harassment \n", + "72 harassment \n", + "74 vandalism?, harassment? \n", + "93 vandalism, harassment \n", + "136 vandalism?, harassment? \n", + "148 harassment \n", + "150 harassment? \n", + "154 harassment \n", + "156 harassment \n", + "168 vandalism, harassment? \n", + "173 vandalism?, harassment? \n", + "187 vandalism, harassment? \n", + "190 vandalism, harassment? \n", + "199 vandalism?, harassment? \n", + "202 vandalism, harassment? \n", + "209 vandalism?, harassment? \n", + "212 vandalism?, harassment? \n", + "215 vandalism?, harassment? \n", + "226 vandalism?, harassment? \n", + "227 vandalism, harassment \n", + "236 harassment \n", + "245 vandalism, harassment? \n", + "247 vandalism?, harassment? \n", + "250 harassment?, vandalism? \n", + "255 vandalism?, harassment? \n", + "258 vandalism?, harassment? \n", + "286 vandalism?, harassment? \n", + "309 harassment? \n", + "349 vandalism, harassment? \n", + "360 vandalism, harassment \n", + "445 harassment?, vandalism?, abuse? \n", + "457 harassment \n", + "465 harassment \n", + "485 harassment, long_term_abuse \n", + "506 vandalism?, harassment? \n", + "512 long_term_abuse, vandalism, religious_vandalis... \n", + "522 harassment \n", + "524 harassment? \n", + "527 abuse?, harassment? \n", + "544 vandalism, sockpuppetry, harassment \n", + "554 abuse?, harassment? \n", + "571 harassment \n", + "594 abuse?, harassment? \n", + "651 harassment \n", + "664 sockpuppetry, long_term_abuse, personal_attack... \n", + "668 harassment \n", + "671 vandalism, harassment? \n", + "752 harassment? \n", + "833 harassment? \n", + "947 harassment \n" + ] + } + ], + "source": [ + "df_harassment_tagged = df[df['manual_tags'].fillna('').str.contains('harassment')]\n", "\n", + "print(df_harassment_tagged[['af_id', 'af_hidden', 'af_public_comments', 'manual_tags']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "Another idea would be to classify filters according to the namespaces they cover. A filter targeting the talk/user name spaces may be indicative of dealing with personal attacks or harassment." ] }, @@ -807,44 +943,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Following filters seem to be potentially targeting harassment:\n", + "Following filters seem to be potentially targeting harassment: (manually kick out all that obviously do not have anything to do with harassment)\n", "\n", " af_id af_public_comments\n", - " 37 320 \"Your mom\" Vandalism\n", " 67 803 Prevent new users from editing other's user pages\n", " 101 602 Arbitration discretionary sanctions alerts\n", " 109 733 New user creating a page in someone else's use...\n", - " 134 878 New user removing COI template\n", " 160 5 User self-renaming or moving user talk pages i...\n", - " 177 850 New user moving page to project space\n", " 193 840 Indexing user pages\n", " 239 930 Prevent indexing userspaces by newer users\n", " 244 134 Template deletion notifications transcluded in...\n", " 274 99 Edits to an other user's userspace\n", " 285 123 New users moving other users' pages\n", - " 296 369 New user editing AFD discussions\n", " 302 828 Redirecting talk page\n", " 329 212 New user placing comments without a header on ...\n", - " 353 192 Direct use of stub categories in articles\n", " 391 928 Transclusion of userpages\n", " 424 168 Non-admins responding to unblock requests\n", - " 427 185 Cross-namespace move\n", " 441 848 Large contributions test filter\n", " 448 144 Hiding content of pages\n", - " 499 694 Moves to or from the Module namespace\n", " 516 910 Maureen Wroblewitz spammer\n", - " 520 635 OTRS template added by non-OTRS member\n", " 578 437 Title blacklist for TITLES with more than 9 caps\n", " 619 6 Users editing editnotices of other users\n", " 643 15 Discussion page vandalism\n", - " 653 897 Weird spambot\n", - " 657 616 disruption of music articles\n", - " 669 219 Arbcom requested filter (FOFF)\n", " 730 207 Non-admins reviewing unblock requests\n", - " 746 658 Disallow moving articles to the Topic namespace\n", - " 754 127 Helpme tag in content namespace\n", - " 784 118 The final selection of TFA's is made by Raul65...\n", - " 785 379 username registrations by trademarkia\n", " 863 67 Sockpuppetry at AfD discussions\n", " 866 329 SPI disruption\n", " 921 427 Possible Emergency Reponse Needed" -- GitLab