diff --git a/src/explore.ipynb b/src/explore.ipynb index 58fd1af91f32297343c66e66590c8cb998a15fe3..352f123f1743e0bdfdcd4fbace27cba0c079ef3c 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -204,7 +204,8 @@ "print (len(df.query('af_group==\"default\"')))\n", "print (df.query('af_group!=\"default\"'))\n", "\n", - "# --> so available groups are \"default\" and \"feedback\"" + "# --> so available groups are \"default\" and \"feedback\"\n", + "# TODO: question: what do they mean?" ] }, { @@ -272,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -289,6 +290,625 @@ "print(collections.Counter(list(active_public['af_actions'].fillna(''))).most_common())" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>af_id</th>\n", + " <th>af_hidden</th>\n", + " <th>af_global</th>\n", + " <th>af_enabled</th>\n", + " <th>af_deleted</th>\n", + " <th>af_throttled</th>\n", + " <th>af_group</th>\n", + " <th>af_timestamp</th>\n", + " <th>af_actions</th>\n", + " <th>af_hit_count</th>\n", + " <th>af_public_comments</th>\n", + " <th>manual_tags</th>\n", + " <th>notes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>384</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181018171832</td>\n", + " <td>disallow</td>\n", + " <td>1159239</td>\n", + " <td>Addition of bad words or other vandalism</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>12</td>\n", + " <td>225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20180807154519</td>\n", + " <td>disallow</td>\n", + " <td>482872</td>\n", + " <td>Vandalism in all caps</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>19</td>\n", + " <td>46</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023193500</td>\n", + " <td>disallow</td>\n", + " <td>356945</td>\n", + " <td>\"Poop\" vandalism</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>22</td>\n", + " <td>260</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181130204255</td>\n", + " <td>disallow</td>\n", + " <td>286852</td>\n", + " <td>Common vandal phrases</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>37</td>\n", + " <td>320</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023193636</td>\n", + " <td>disallow</td>\n", + " <td>152994</td>\n", + " <td>\"Your mom\" Vandalism</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>41</td>\n", + " <td>12</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20170606215509</td>\n", + " <td>disallow</td>\n", + " <td>122756</td>\n", + " <td>Replacing a page with obscenities</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>47</td>\n", + " <td>680</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023195329</td>\n", + " <td>disallow</td>\n", + " <td>95242</td>\n", + " <td>Adding emoji unicode characters</td>\n", + " <td>good_faith</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>54</td>\n", + " <td>365</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023193820</td>\n", + " <td>disallow</td>\n", + " <td>85470</td>\n", + " <td>Unusual changes to featured or good content</td>\n", + " <td>vandalism</td>\n", + " <td>Unusual == unusually big (edit_delta > 15000 |...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>67</td>\n", + " <td>803</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023223916</td>\n", + " <td>disallow</td>\n", + " <td>46756</td>\n", + " <td>Prevent new users from editing other's user pages</td>\n", + " <td>vandalism, good_faith</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>99</th>\n", + " <td>99</td>\n", + " <td>782</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023223745</td>\n", + " <td>disallow</td>\n", + " <td>16702</td>\n", + " <td>Content Translation Edits</td>\n", + " <td>misc</td>\n", + " <td>why is this not allowed? Seems to be translati...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>107</th>\n", + " <td>107</td>\n", + " <td>420</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181211033934</td>\n", + " <td>throttle,disallow</td>\n", + " <td>13874</td>\n", + " <td>Large removal of talk page content by IP</td>\n", + " <td>vandalism?</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>110</th>\n", + " <td>110</td>\n", + " <td>554</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023194722</td>\n", + " <td>disallow</td>\n", + " <td>13394</td>\n", + " <td>top100 blog charts</td>\n", + " <td>seo?, vandalism?, spam?</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>125</th>\n", + " <td>125</td>\n", + " <td>890</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181207113011</td>\n", + " <td>warn,disallow</td>\n", + " <td>9932</td>\n", + " <td>Random typing in username</td>\n", + " <td>vandalism?</td>\n", + " <td>“This is fascinating: the lack of output from ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>130</th>\n", + " <td>130</td>\n", + " <td>784</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20170419165539</td>\n", + " <td>disallow</td>\n", + " <td>9265</td>\n", + " <td>Harambe vandalism</td>\n", + " <td>vandalism</td>\n", + " <td>https://en.wikipedia.org/wiki/Killing_of_Harambe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>158</th>\n", + " <td>158</td>\n", + " <td>887</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181105103817</td>\n", + " <td>warn,disallow</td>\n", + " <td>4589</td>\n", + " <td>Excessive repetition in usernames</td>\n", + " <td>vandalism?, spam?, sockpuppetry?</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>171</td>\n", + " <td>860</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181112215348</td>\n", + " <td>disallow</td>\n", + " <td>3451</td>\n", + " <td>Ryan Ross vandalism</td>\n", + " <td>vandalism</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>234</th>\n", + " <td>234</td>\n", + " <td>892</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023224029</td>\n", + " <td>disallow</td>\n", + " <td>1245</td>\n", + " <td>RS linked through proxy</td>\n", + " <td>bad_style?, misc?</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>239</th>\n", + " <td>239</td>\n", + " <td>930</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023224118</td>\n", + " <td>disallow</td>\n", + " <td>1165</td>\n", + " <td>Prevent indexing userspaces by newer users</td>\n", + " <td>wiki_policy?</td>\n", + " <td>https://en.wikipedia.org/wiki/Wikipedia:Contro...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>268</th>\n", + " <td>268</td>\n", + " <td>812</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20161212220959</td>\n", + " <td>disallow</td>\n", + " <td>726</td>\n", + " <td>Unreasonably large addition of content</td>\n", + " <td>vandalism?</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>271</th>\n", + " <td>271</td>\n", + " <td>642</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023194832</td>\n", + " <td>disallow</td>\n", + " <td>712</td>\n", + " <td>OTRS template added by non-OTRS member (global)</td>\n", + " <td>good_faith?</td>\n", + " <td>from comments: “This filter is easy to subvert...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>302</th>\n", + " <td>302</td>\n", + " <td>828</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20171102075753</td>\n", + " <td>disallow</td>\n", + " <td>527</td>\n", + " <td>Redirecting talk page</td>\n", + " <td>vandalism</td>\n", + " <td>“Built to prevent IP disruption of talk pages,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>328</th>\n", + " <td>328</td>\n", + " <td>788</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20170309083707</td>\n", + " <td>disallow</td>\n", + " <td>374</td>\n", + " <td>IP removing report from RFPP</td>\n", + " <td>vandalism</td>\n", + " <td>RFP = Request for page protection</td>\n", + " </tr>\n", + " <tr>\n", + " <th>470</th>\n", + " <td>470</td>\n", + " <td>843</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20170628192710</td>\n", + " <td>disallow</td>\n", + " <td>98</td>\n", + " <td>Prevent new users from creating redirects to [...</td>\n", + " <td>vandalism, sockpuppetry</td>\n", + " <td>https://en.wikipedia.org/w/index.php?title=Wik...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>499</th>\n", + " <td>499</td>\n", + " <td>694</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023223653</td>\n", + " <td>disallow</td>\n", + " <td>74</td>\n", + " <td>Moves to or from the Module namespace</td>\n", + " <td>good_faith</td>\n", + " <td>“These types of moves don't do what users expe...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>653</th>\n", + " <td>653</td>\n", + " <td>897</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20180123142941</td>\n", + " <td>disallow</td>\n", + " <td>14</td>\n", + " <td>Weird spambot</td>\n", + " <td>spam, vandalbot</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 af_id af_hidden af_global af_enabled af_deleted \\\n", + "3 3 384 0 0 1 0 \n", + "12 12 225 0 0 1 0 \n", + "19 19 46 0 0 1 0 \n", + "22 22 260 0 0 1 0 \n", + "37 37 320 0 0 1 0 \n", + "41 41 12 0 0 1 0 \n", + "47 47 680 0 0 1 0 \n", + "54 54 365 0 0 1 0 \n", + "67 67 803 0 0 1 0 \n", + "99 99 782 0 0 1 0 \n", + "107 107 420 0 0 1 0 \n", + "110 110 554 0 0 1 0 \n", + "125 125 890 0 0 1 0 \n", + "130 130 784 0 0 1 0 \n", + "158 158 887 0 0 1 0 \n", + "171 171 860 0 0 1 0 \n", + "234 234 892 0 0 1 0 \n", + "239 239 930 0 0 1 0 \n", + "268 268 812 0 0 1 0 \n", + "271 271 642 0 0 1 0 \n", + "302 302 828 0 0 1 0 \n", + "328 328 788 0 0 1 0 \n", + "470 470 843 0 0 1 0 \n", + "499 499 694 0 0 1 0 \n", + "653 653 897 0 0 1 0 \n", + "\n", + " af_throttled af_group af_timestamp af_actions af_hit_count \\\n", + "3 0 default 20181018171832 disallow 1159239 \n", + "12 0 default 20180807154519 disallow 482872 \n", + "19 0 default 20181023193500 disallow 356945 \n", + "22 0 default 20181130204255 disallow 286852 \n", + "37 0 default 20181023193636 disallow 152994 \n", + "41 0 default 20170606215509 disallow 122756 \n", + "47 0 default 20181023195329 disallow 95242 \n", + "54 0 default 20181023193820 disallow 85470 \n", + "67 0 default 20181023223916 disallow 46756 \n", + "99 0 default 20181023223745 disallow 16702 \n", + "107 0 default 20181211033934 throttle,disallow 13874 \n", + "110 0 default 20181023194722 disallow 13394 \n", + "125 0 default 20181207113011 warn,disallow 9932 \n", + "130 0 default 20170419165539 disallow 9265 \n", + "158 0 default 20181105103817 warn,disallow 4589 \n", + "171 0 default 20181112215348 disallow 3451 \n", + "234 0 default 20181023224029 disallow 1245 \n", + "239 0 default 20181023224118 disallow 1165 \n", + "268 0 default 20161212220959 disallow 726 \n", + "271 0 default 20181023194832 disallow 712 \n", + "302 0 default 20171102075753 disallow 527 \n", + "328 0 default 20170309083707 disallow 374 \n", + "470 0 default 20170628192710 disallow 98 \n", + "499 0 default 20181023223653 disallow 74 \n", + "653 0 default 20180123142941 disallow 14 \n", + "\n", + " af_public_comments \\\n", + "3 Addition of bad words or other vandalism \n", + "12 Vandalism in all caps \n", + "19 \"Poop\" vandalism \n", + "22 Common vandal phrases \n", + "37 \"Your mom\" Vandalism \n", + "41 Replacing a page with obscenities \n", + "47 Adding emoji unicode characters \n", + "54 Unusual changes to featured or good content \n", + "67 Prevent new users from editing other's user pages \n", + "99 Content Translation Edits \n", + "107 Large removal of talk page content by IP \n", + "110 top100 blog charts \n", + "125 Random typing in username \n", + "130 Harambe vandalism \n", + "158 Excessive repetition in usernames \n", + "171 Ryan Ross vandalism \n", + "234 RS linked through proxy \n", + "239 Prevent indexing userspaces by newer users \n", + "268 Unreasonably large addition of content \n", + "271 OTRS template added by non-OTRS member (global) \n", + "302 Redirecting talk page \n", + "328 IP removing report from RFPP \n", + "470 Prevent new users from creating redirects to [... \n", + "499 Moves to or from the Module namespace \n", + "653 Weird spambot \n", + "\n", + " manual_tags \\\n", + "3 vandalism \n", + "12 vandalism \n", + "19 vandalism \n", + "22 vandalism \n", + "37 vandalism \n", + "41 vandalism \n", + "47 good_faith \n", + "54 vandalism \n", + "67 vandalism, good_faith \n", + "99 misc \n", + "107 vandalism? \n", + "110 seo?, vandalism?, spam? \n", + "125 vandalism? \n", + "130 vandalism \n", + "158 vandalism?, spam?, sockpuppetry? \n", + "171 vandalism \n", + "234 bad_style?, misc? \n", + "239 wiki_policy? \n", + "268 vandalism? \n", + "271 good_faith? \n", + "302 vandalism \n", + "328 vandalism \n", + "470 vandalism, sockpuppetry \n", + "499 good_faith \n", + "653 spam, vandalbot \n", + "\n", + " notes \n", + "3 NaN \n", + "12 NaN \n", + "19 NaN \n", + "22 NaN \n", + "37 NaN \n", + "41 NaN \n", + "47 NaN \n", + "54 Unusual == unusually big (edit_delta > 15000 |... \n", + "67 NaN \n", + "99 why is this not allowed? Seems to be translati... \n", + "107 NaN \n", + "110 NaN \n", + "125 “This is fascinating: the lack of output from ... \n", + "130 https://en.wikipedia.org/wiki/Killing_of_Harambe \n", + "158 NaN \n", + "171 NaN \n", + "234 NaN \n", + "239 https://en.wikipedia.org/wiki/Wikipedia:Contro... \n", + "268 NaN \n", + "271 from comments: “This filter is easy to subvert... \n", + "302 “Built to prevent IP disruption of talk pages,... \n", + "328 RFP = Request for page protection \n", + "470 https://en.wikipedia.org/w/index.php?title=Wik... \n", + "499 “These types of moves don't do what users expe... \n", + "653 NaN " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#TODO: all ids of active public filters set to disallow\n", + "active_public[active_public['af_actions'].fillna('').str.contains('disallow')]" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -549,11 +1169,166 @@ " print(active_public[['af_id', 'af_actions', 'manual_tags']].fillna(''))\n" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>af_id</th>\n", + " <th>af_hidden</th>\n", + " <th>af_global</th>\n", + " <th>af_enabled</th>\n", + " <th>af_deleted</th>\n", + " <th>af_throttled</th>\n", + " <th>af_group</th>\n", + " <th>af_timestamp</th>\n", + " <th>af_actions</th>\n", + " <th>af_hit_count</th>\n", + " <th>af_public_comments</th>\n", + " <th>manual_tags</th>\n", + " <th>notes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>47</td>\n", + " <td>680</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023195329</td>\n", + " <td>disallow</td>\n", + " <td>95242</td>\n", + " <td>Adding emoji unicode characters</td>\n", + " <td>good_faith</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>67</td>\n", + " <td>803</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023223916</td>\n", + " <td>disallow</td>\n", + " <td>46756</td>\n", + " <td>Prevent new users from editing other's user pages</td>\n", + " <td>vandalism, good_faith</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>271</th>\n", + " <td>271</td>\n", + " <td>642</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023194832</td>\n", + " <td>disallow</td>\n", + " <td>712</td>\n", + " <td>OTRS template added by non-OTRS member (global)</td>\n", + " <td>good_faith?</td>\n", + " <td>from comments: “This filter is easy to subvert...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>499</th>\n", + " <td>499</td>\n", + " <td>694</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>default</td>\n", + " <td>20181023223653</td>\n", + " <td>disallow</td>\n", + " <td>74</td>\n", + " <td>Moves to or from the Module namespace</td>\n", + " <td>good_faith</td>\n", + " <td>“These types of moves don't do what users expe...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 af_id af_hidden af_global af_enabled af_deleted \\\n", + "47 47 680 0 0 1 0 \n", + "67 67 803 0 0 1 0 \n", + "271 271 642 0 0 1 0 \n", + "499 499 694 0 0 1 0 \n", + "\n", + " af_throttled af_group af_timestamp af_actions af_hit_count \\\n", + "47 0 default 20181023195329 disallow 95242 \n", + "67 0 default 20181023223916 disallow 46756 \n", + "271 0 default 20181023194832 disallow 712 \n", + "499 0 default 20181023223653 disallow 74 \n", + "\n", + " af_public_comments manual_tags \\\n", + "47 Adding emoji unicode characters good_faith \n", + "67 Prevent new users from editing other's user pages vandalism, good_faith \n", + "271 OTRS template added by non-OTRS member (global) good_faith? \n", + "499 Moves to or from the Module namespace good_faith \n", + "\n", + " notes \n", + "47 NaN \n", + "67 NaN \n", + "271 from comments: “This filter is easy to subvert... \n", + "499 “These types of moves don't do what users expe... " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#TODO: all ids of active public filters set to disallow and labeled 'good_faith'\n", + "active_public[active_public['af_actions'].fillna('').str.contains('disallow') & active_public['manual_tags'].fillna('').str.contains('good_faith')]" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**TODO** It would be interesting to check all those filters which actions are set to \"disallow\" but I've labeled as \"good_faith\" for example" + "Upon second inspection (looking at https://en.wikipedia.org/wiki/Special:AbuseLog), edits targeted by filter with id 680 (adding unicode emojis) seem to be vandalism in their vast majority.\n", + "\n", + "Filter 803 seems to be labeled accurately on the other hand. There are a lot of personal insults in these edits, but there are also some that look as if the user themselves wanted to edit their own page but forgot to log in (so they're editing from an IP).\n", + "\n", + "The other two filters seem to be labeled somewhat reasonably and they are set to \"disallow\" since the intended/expected effect of the action difers from the real one and since they're disruptive." ] }, {