diff --git a/src/20190106115600_filters-sorted-by-hits-manual-tags.csv b/src/20190106115600_filters-sorted-by-hits-manual-tags.csv index 0da98d9e865e55c5b324ed80ac1026bdd03c40da..5eed980ef0675796c6985ed50516b5ac2df7bc56 100644 --- a/src/20190106115600_filters-sorted-by-hits-manual-tags.csv +++ b/src/20190106115600_filters-sorted-by-hits-manual-tags.csv @@ -449,7 +449,7 @@ 446 250 1 0 0 1 0 default 20110310110912 disallow 122 Combined pagemove/edit summary vandalism vandalism 447 44 1 0 0 1 0 default 20090807112546 121 MAM vandalism vandalism 448 144 0 0 0 1 0 default 20100106173410 120 Hiding content of pages vandalism?, biased_pov? deleted; “5076 Only catches a few bad-faith edits per month. Disabling. -Wknight94” “Filter to (eventually) disallow attempts to hide the content of a page with HTML tags. Will trip for new and unregistered users. - Hersfold “ -449 838 0 0 1 0 0 default 20181113040819 warn 119 Main page hello worlds vandalism?, maintenance? “Multiple sources seem to have published a link to the english wikipedia to write via the api sandbox. -- Xaosflux” +449 838 0 0 1 0 0 default 20181113040819 warn 119 Main page hello worlds vandalism?, maintenance? “Multiple sources seem to have published a link to the english wikipedia to write via the api sandbox. -- Xaosflux” 450 453 1 0 0 1 0 default 20130404081115 warn,disallow 119 Temporary block on making specific kinds of template edits vandalism? hidden 451 596 1 0 0 0 0 default 20160812035607 disallow 117 Elevator vandal vandalism 452 281 1 0 0 1 0 default 20100108215922 115 Unwatched BLPs unknown hidden @@ -512,7 +512,7 @@ 509 312 1 0 0 1 0 default 20150212173501 62 External link removal vandalism?, biased_pov? hidden 510 621 1 0 0 1 0 default 20160614035403 disallow 62 Spambot Filter spam? hidden 511 328 1 0 0 1 0 default 20130404080744 61 Prolific socker III sockpuppetry -512 660 1 0 0 0 0 default 20150128141314 61 Possibly JarlaxleArtemis (LTA) long_term_abuse, vandalism, religious, politically_motivated, harassment https://en.wikipedia.org/wiki/Wikipedia:Long-term_abuse/JarlaxleArtemis; “Cross-wiki harassment and threats of death and violence; criticism of liberals, the far-left and SJWs; criticism of anything not involving two clearly defined genders; criticism of Islam and anything not supporting Israel and Judaism; and usage of open proxies.” +512 660 1 0 0 0 0 default 20150128141314 61 Possibly JarlaxleArtemis (LTA) long_term_abuse, vandalism, religious_vandalism, politically_motivated, harassment https://en.wikipedia.org/wiki/Wikipedia:Long-term_abuse/JarlaxleArtemis; “Cross-wiki harassment and threats of death and violence; criticism of liberals, the far-left and SJWs; criticism of anything not involving two clearly defined genders; criticism of Islam and anything not supporting Israel and Judaism; and usage of open proxies.” 513 668 1 0 0 0 0 default 20150429015714 disallow 61 GP disruption vandalism? hidden 514 226 1 0 0 1 0 default 20091003121354 61 Johnali123 sockpuppets sockpuppetry 515 4 1 0 0 1 0 default 20160812192142 60 Copernicus vandal vandalism @@ -670,12 +670,12 @@ Marked as deleted. - Ruslik""," 666 929 0 0 1 0 0 default 20180826201614 12 LTA Hydro long_term_abuse, bad_style? “long term editor attempting to use WP as a soap box for a domain name dispute” 667 683 1 0 0 1 0 default 20160614040802 12 New users removing sections from the Reference Desk vandalism? hidden 668 462 1 0 0 1 0 default 20120820155230 disallow 12 Serial harassment of MuZemike harassment hidden -669 219 0 0 0 1 0 default 20090803161738 12 Arbcom requested filter (FOFF) silly_vandalism, personal_attack? filters “fuck you” and “fuck off” +669 219 0 0 0 1 0 default 20090803161738 12 Arbcom requested filter (FOFF) silly_vandalism, personal_attacks? filters “fuck you” and “fuck off” 670 780 0 0 0 0 0 default 20160812030457 11 Addition of interlanguage links maintenance, vandalism? hidden; “Reason: Interwikis are now handled by Wikidata.”; “Disabling. Too infrequent and for the good-faith edits I'm not too happy with even throwing a warning ~MA 2016.08.11” 671 556 1 0 0 1 0 default 20161012202550 disallow 11 Impersonation usernames vandalism, harassment? hidden 672 332 1 0 0 1 0 default 20100522071643 11 J.delanoy vandal vandalism hidden 673 598 1 0 0 1 0 default 20140102175720 warn,disallow 11 Template redirect protection vandalism? hidden -674 158 0 0 0 1 0 default 20090617081158 disallow 11 Bill Posey = alligator child silly_vandalism?, personal_attack? deleted +674 158 0 0 0 1 0 default 20090617081158 disallow 11 Bill Posey = alligator child silly_vandalism?, personal_attacks? deleted 675 451 1 0 0 1 0 default 20130404081107 disallow 11 IP attacks vandalism? hidden 676 727 0 0 0 0 0 default 20160126000441 11 Samwalton9 test filter test hidden 677 500 1 0 0 1 0 default 20160929181057 disallow 11 Sock sockpuppetry hidden diff --git a/src/explore.ipynb b/src/explore.ipynb index 20476dbeb2156817b3139c6f488ac60dc7e2c0a7..c449caf302b82158267272e9cfa7f72411c1f67f 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -76,7 +76,8 @@ "201\n", "753\n", "600\n", - "110\n" + "110\n", + "0\n" ] } ], @@ -91,7 +92,10 @@ "print (len(df.query('af_deleted==1')))\n", "\n", "# Active public filters\n", - "print (len(df.query('af_hidden==0 and af_enabled==1')))" + "print (len(df.query('af_hidden==0 and af_enabled==1')))\n", + "\n", + "# Deleted and enabled\n", + "print (len(df.query('af_deleted==1 and af_enabled==1')))" ] }, { @@ -220,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -302,14 +306,14 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[('vandalism', 261), ('vandalism?', 161), ('unknown', 71), ('good_faith?', 63), ('misc', 59), ('sockpuppetry', 59), ('good_faith', 48), ('test', 43), ('long_term_abuse', 35), ('sockpuppetry?', 35), ('harassment?', 31), ('harassment', 24), ('spam? ', 21), ('abuse?', 21), ('spam?', 19), ('biased_pov', 17), ('spam', 17), ('biased_pov?', 15), ('bad_style', 13), ('', 13), ('bad_style?', 11), ('bug?', 10), ('wiki_policy?', 9), ('misc?', 8), ('seo', 8), ('unclear', 8), ('maintenance', 7), ('trolling?', 7), ('long_term_abuse?', 6), ('personal_attacks', 6), ('bug', 5), ('not sure', 5), ('page_move_vandalism', 5), ('lazyness', 4), ('seo?', 4), ('test?', 4), ('politically_motivated?', 4), ('hoaxing?', 4), ('silly_vandalism', 4), ('long_term_abuse? ', 3), ('maintenance?', 3), ('copyright', 3), ('image_vandalism', 3), ('page_move_vandalism?', 3), ('conflict_of_interest', 3), ('stockbrocker_vandalism', 3), ('edit_warring?', 2), ('religious_vandalism?', 2), ('self_promotion?', 2), ('politically motivated?', 2), ('template_spam', 2), ('personal_attack?', 2), ('doxxing?', 2), ('talk_page_vandalism', 2), ('personal_attacks?', 2), ('bot_vandalism', 2), ('possible vandalism', 1), ('copyright?', 1), ('misc? vandalism', 1), ('not_polite', 1), ('test_bot?', 1), ('copyright_violation?', 1), ('political controversy?', 1), ('edit_war?', 1), ('maintenance? ', 1), ('religious', 1), ('political vandalism', 1), ('self_promotion', 1), ('political vandalism?', 1), ('abuse', 1), ('can’t decide', 1), ('vandalism (template vandalism)?', 1), ('spam? Link vandalism?', 1), ('politically motivated', 1), ('abuse_of_tags_vandalism?', 1), ('avoidant_vandalism', 1), ('guideline_vio?', 1), ('vandalbot', 1), ('username_vandalism?', 1), ('hoaxing', 1), ('unencyclopedic', 1), ('stupid_vandalism', 1), ('stupid_vandalism?', 1), ('silly_vandalism?', 1), ('take_page_vandalism', 1), ('unknown?', 1), ('phishing?', 1), ('avoidant_vandalism?', 1), ('malware?', 1), ('malware', 1), ('conflict_of_interest?', 1), ('bad_style? (information not confirmed?)', 1), ('maintainance?', 1), ('bot..?', 1), ('bot_vandalism?', 1), ('hoax', 1), ('impersonation', 1), ('prank', 1), ('vandalbots', 1)]\n" + "[('vandalism', 263), ('vandalism?', 162), ('unknown', 71), ('good_faith?', 63), ('misc', 59), ('sockpuppetry', 59), ('good_faith', 48), ('test', 43), ('spam?', 41), ('long_term_abuse', 35), ('sockpuppetry?', 35), ('harassment?', 31), ('harassment', 24), ('abuse?', 21), ('biased_pov', 17), ('spam', 17), ('biased_pov?', 15), ('unclear', 14), ('bad_style', 13), ('bad_style?', 12), ('bug?', 10), ('wiki_policy?', 9), ('long_term_abuse?', 9), ('misc?', 8), ('seo', 8), ('politically_motivated?', 8), ('maintenance', 7), ('trolling?', 7), ('maintenance?', 6), ('personal_attacks', 6), ('bug', 5), ('vandalbot', 5), ('page_move_vandalism', 5), ('silly_vandalism', 5), ('lazyness', 4), ('seo?', 4), ('test?', 4), ('hoaxing?', 4), ('personal_attacks?', 4), ('edit_warring?', 3), ('copyright', 3), ('image_vandalism', 3), ('talk_page_vandalism', 3), ('page_move_vandalism?', 3), ('conflict_of_interest', 3), ('stockbrocker_vandalism', 3), ('copyright?', 2), ('vandalbot?', 2), ('religious_vandalism?', 2), ('politically_motivated', 2), ('self_promotion?', 2), ('template_spam', 2), ('hoaxing', 2), ('silly_vandalism?', 2), ('doxxing?', 2), ('not_polite', 1), ('template_vandalism', 1), ('religious_vandalism', 1), ('self_promotion', 1), ('abuse', 1), ('template_vandalism?', 1), ('link_vandalism?', 1), ('abuse_of_tags_vandalism?', 1), ('avoidant_vandalism', 1), ('guideline_vio?', 1), ('username_vandalism?', 1), ('phishing?', 1), ('avoidant_vandalism?', 1), ('malware?', 1), ('malware', 1), ('conflict_of_interest?', 1), ('impersonation', 1), ('prank', 1)]\n" ] } ], @@ -321,6 +325,100 @@ "print(collections.Counter(all_tags).most_common())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "('vandalism', 263),\n", + "('vandalism?', 162),\n", + " ('spam?', 41),\n", + " ('spam', 17),\n", + " ('vandalbot', 5),\n", + " ('vandalbot?', 2),\n", + " ('page_move_vandalism', 5),\n", + " ('page_move_vandalism?', 3),\n", + " ('silly_vandalism', 5),\n", + " ('silly_vandalism?', 2),\n", + " ('trolling?', 7),\n", + " ('hoaxing?', 4),\n", + " ('hoaxing', 2),\n", + " ('copyright', 3),\n", + " ('copyright?', 2),\n", + " ('image_vandalism', 3),\n", + " ('talk_page_vandalism', 3),\n", + " ('template_vandalism?', 1),\n", + " ('template_vandalism', 1),\n", + " ('template_spam', 2),\n", + " ('link_vandalism?', 1),\n", + " ('abuse_of_tags_vandalism?', 1),\n", + " ('avoidant_vandalism', 1),\n", + " ('avoidant_vandalism?', 1),\n", + " ('username_vandalism?', 1),\n", + "\n", + "('prank', 1)\n", + "\n", + "('phishing?', 1),\n", + "('malware?', 1),\n", + "('malware', 1),\n", + "\n", + "('guideline_vio?', 1),\n", + "\n", + "('religious_vandalism?', 3),\n", + "('politically_motivated?', 8),\n", + "('politically_motivated', 2),\n", + "\n", + "('sockpuppetry', 59),\n", + "('sockpuppetry?', 35),\n", + "('long_term_abuse', 35),\n", + "('long_term_abuse?', 9),\n", + "('abuse', 1),\n", + "('abuse?', 21),\n", + "('harassment?', 31),\n", + "('harassment', 24),\n", + "('doxxing?', 2),\n", + "('personal_attacks', 6),\n", + "('personal_attacks?', 4),\n", + "('impersonation', 1),\n", + "('not_polite', 1),\n", + "\n", + "('biased_pov', 17),\n", + "('biased_pov?', 15),\n", + "\n", + "('conflict_of_interest', 3),\n", + "('stockbrocker_vandalism', 3),\n", + "('self_promotion?', 2),\n", + "('conflict_of_interest?', 1),\n", + "('self_promotion', 1),\n", + "\n", + "('seo', 8),\n", + "('seo?', 4),\n", + "\n", + "('bad_style', 13),\n", + "('bad_style?', 12),\n", + "('edit_warring?', 3),\n", + "\n", + "('good_faith?', 63),\n", + "('good_faith', 48),\n", + "\n", + "('lazyness', 4),\n", + "\n", + "('maintenance', 7),\n", + "('maintenance?', 5),\n", + "('maintenance? ', 1),\n", + "\n", + "('bug', 5),\n", + "('bug?', 10),\n", + "('wiki_policy?', 9),\n", + "\n", + "('test', 43),\n", + "('test?', 4),\n", + "\n", + "('unknown', 71),\n", + "('misc', 59),\n", + "('misc?', 8),\n", + "('unclear', 14)," + ] + }, { "cell_type": "markdown", "metadata": {},