diff --git a/src/explore.ipynb b/src/explore.ipynb index 2acd174fc4323ecb9ef38e31e1ff905890fc6a9b..54c7800bcce81223ecc85829373eed7b664764b9 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -1489,6 +1489,332 @@ "active_public[active_public['af_actions'].fillna('log only').str.contains('disallow')]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Who are filters aimed at?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is explored how many filters target unconfirmed users" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>af_id</th>\n", + " <th>af_public_comments</th>\n", + " <th>af_hit_count</th>\n", + " <th>af_actions</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>61</td>\n", + " <td>New user removing references</td>\n", + " <td>1611956</td>\n", + " <td>tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>384</td>\n", + " <td>Addition of bad words or other vandalism</td>\n", + " <td>1159239</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>30</td>\n", + " <td>Large deletion from article by new editors</td>\n", + " <td>840871</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>636</td>\n", + " <td>Unexplained removal of sourced content</td>\n", + " <td>726764</td>\n", + " <td>warn</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>3</td>\n", + " <td>New user blanking articles</td>\n", + " <td>700522</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>432</td>\n", + " <td>Starting new line with lowercase letters</td>\n", + " <td>558578</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>225</td>\n", + " <td>Vandalism in all caps</td>\n", + " <td>482872</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>50</td>\n", + " <td>Shouting</td>\n", + " <td>480960</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>231</td>\n", + " <td>Long string of characters containing no spaces</td>\n", + " <td>380302</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>46</td>\n", + " <td>\"Poop\" vandalism</td>\n", + " <td>356945</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>39</td>\n", + " <td>School libel and vandalism</td>\n", + " <td>150568</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>11</td>\n", + " <td>You/He/She/It sucks</td>\n", + " <td>109657</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>680</td>\n", + " <td>Adding emoji unicode characters</td>\n", + " <td>95242</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>365</td>\n", + " <td>Unusual changes to featured or good content</td>\n", + " <td>85470</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>58</th>\n", + " <td>126</td>\n", + " <td>Youtube links</td>\n", + " <td>65137</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>803</td>\n", + " <td>Prevent new users from editing other's user pages</td>\n", + " <td>46756</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>68</th>\n", + " <td>117</td>\n", + " <td>removal of Category:Living people</td>\n", + " <td>43822</td>\n", + " <td>tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>91</th>\n", + " <td>113</td>\n", + " <td>Misplaced #redirect in articles</td>\n", + " <td>20885</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94</th>\n", + " <td>59</td>\n", + " <td>New user removing templates on image description</td>\n", + " <td>19938</td>\n", + " <td>tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>100</th>\n", + " <td>655</td>\n", + " <td>Large plot section addition</td>\n", + " <td>16051</td>\n", + " <td>tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>130</th>\n", + " <td>784</td>\n", + " <td>Harambe vandalism</td>\n", + " <td>9265</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>138</th>\n", + " <td>912</td>\n", + " <td>Possible \"fortnite\" vandalism</td>\n", + " <td>7505</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>860</td>\n", + " <td>Ryan Ross vandalism</td>\n", + " <td>3451</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " <tr>\n", + " <th>211</th>\n", + " <td>766</td>\n", + " <td>Alt-right labeling</td>\n", + " <td>1866</td>\n", + " <td>warn,tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>226</th>\n", + " <td>921</td>\n", + " <td>Suspicious claims of nazism</td>\n", + " <td>1422</td>\n", + " <td>tag</td>\n", + " </tr>\n", + " <tr>\n", + " <th>470</th>\n", + " <td>843</td>\n", + " <td>Prevent new users from creating redirects to [...</td>\n", + " <td>98</td>\n", + " <td>disallow</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " af_id af_public_comments af_hit_count \\\n", + "0 61 New user removing references 1611956 \n", + "3 384 Addition of bad words or other vandalism 1159239 \n", + "5 30 Large deletion from article by new editors 840871 \n", + "7 636 Unexplained removal of sourced content 726764 \n", + "8 3 New user blanking articles 700522 \n", + "11 432 Starting new line with lowercase letters 558578 \n", + "12 225 Vandalism in all caps 482872 \n", + "13 50 Shouting 480960 \n", + "17 231 Long string of characters containing no spaces 380302 \n", + "19 46 \"Poop\" vandalism 356945 \n", + "38 39 School libel and vandalism 150568 \n", + "45 11 You/He/She/It sucks 109657 \n", + "47 680 Adding emoji unicode characters 95242 \n", + "54 365 Unusual changes to featured or good content 85470 \n", + "58 126 Youtube links 65137 \n", + "67 803 Prevent new users from editing other's user pages 46756 \n", + "68 117 removal of Category:Living people 43822 \n", + "91 113 Misplaced #redirect in articles 20885 \n", + "94 59 New user removing templates on image description 19938 \n", + "100 655 Large plot section addition 16051 \n", + "130 784 Harambe vandalism 9265 \n", + "138 912 Possible \"fortnite\" vandalism 7505 \n", + "171 860 Ryan Ross vandalism 3451 \n", + "211 766 Alt-right labeling 1866 \n", + "226 921 Suspicious claims of nazism 1422 \n", + "470 843 Prevent new users from creating redirects to [... 98 \n", + "\n", + " af_actions \n", + "0 tag \n", + "3 disallow \n", + "5 warn,tag \n", + "7 warn \n", + "8 warn,tag \n", + "11 warn,tag \n", + "12 disallow \n", + "13 warn,tag \n", + "17 warn,tag \n", + "19 disallow \n", + "38 warn,tag \n", + "45 warn,tag \n", + "47 disallow \n", + "54 disallow \n", + "58 NaN \n", + "67 disallow \n", + "68 tag \n", + "91 warn,tag \n", + "94 tag \n", + "100 tag \n", + "130 disallow \n", + "138 warn,tag \n", + "171 disallow \n", + "211 warn,tag \n", + "226 tag \n", + "470 disallow " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "newbie_filters = df_origin[df_origin['af_pattern'].fillna('').str.contains('!(\"confirmed\" in user_groups)', regex=False)]\n", + "newbie_filters[newbie_filters['af_enabled'] == 1][['af_id', 'af_public_comments', 'af_hit_count', 'af_actions']]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "43\n", + "26\n", + "11\n", + "9\n" + ] + } + ], + "source": [ + "print(len(newbie_filters))\n", + "print(len(newbie_filters[newbie_filters['af_enabled'] == 1]))\n", + "print(len(newbie_filters[newbie_filters['af_actions'] == \"disallow\"]))\n", + "print(len(newbie_filters.query('af_actions==\"disallow\" & af_enabled==1')))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/thesis/5-Overview-EN-Wiki.tex b/thesis/5-Overview-EN-Wiki.tex index 375dfd8732acbbb1d082db629fbb5b8cb5726d48..3192102d1f43b5ac0c734eea6ab835641861162b 100644 --- a/thesis/5-Overview-EN-Wiki.tex +++ b/thesis/5-Overview-EN-Wiki.tex @@ -229,7 +229,6 @@ The proportion of vandalism related filters is higher when we look at all filter Again, this is probably due to the presumed higher fluctuation rates of hidden filters which (according to my labeling, see section~\ref{sec:manual-classification} for rationale) are always vandalism related. It also comes to attention that the relative share of maintenance related filters is higher when we look at all filters. The detailed distribution of manually assigned codes and their parent categories can be view on figure~\ref{fig:manual-tags}. -%TODO I don't have anything else to say here? %TODO make these two subfigures of the same figure \begin{figure} @@ -252,6 +251,45 @@ The detailed distribution of manually assigned codes and their parent categories \end{figure} \end{landscape} +Another feature explored was the explicit targeting of not confirmed users (see table~\ref{tab:newbie-filters}). +It arrests attention that various filters have what the edit filter managers have dubbed ``the newbie check'': \verb|!("confirmed" in user_groups)| as one of their first conditions. +There are in total $43$ such filters, $26$ of them are enabled as of January 2019 (so they make up approximately 20\% of all enabled filters at the time) and $9$ of the enabled filters disallow the edit directly when matched. + +\begin{table*}[h] + \centering + \begin{tabular}{p{1cm} p{9cm} r p{2cm} } + % \toprule + Filter ID & Publicly available description & Hitcount & Actions \\ + \hline + 61 & New user removing references & 1611956 & tag \\ + 384 & Addition of bad words or other vandalism & 1159239 & disallow\\ + 30 & Large deletion from article by new editors & 840871 & warn,tag\\ + 636 & Unexplained removal of sourced content & 726764 & warn\\ + 3 & New user blanking articles & 700522 & warn,tag\\ + 432 & Starting new line with lowercase letters & 558578 & warn,tag\\ + 225 & Vandalism in all caps & 482872 & disallow\\ + 50 & Shouting & 480960 & warn,tag\\ + 231 & Long string of characters containing no spaces & 380302 & warn,tag\\ + 46 & "Poop" vandalism & 356945 & disallow\\ + 39 & School libel and vandalism & 150568 & warn,tag\\ + 11 & You/He/She/It sucks & 109657 & warn,tag\\ + 680 & Adding emoji unicode characters & 95242 & disallow\\ + 365 & Unusual changes to featured or good content & 85470 & disallow\\ + 126 & Youtube links & 65137 & log only\\ + 803 & Prevent new users from editing other's user pages & 46756 & disallow\\ + 117 & removal of Category:Living people & 43822 & tag\\ + 113 & Misplaced \#redirect in articles & 20885 & warn,tag\\ + 59 & New user removing templates on image description & 19938 & tag\\ + 655 & Large plot section addition & 16051 & tag\\ + 784 & Harambe vandalism & 9265 & disallow\\ + 912 & Possible "fortnite" vandalism & 7505 & warn,tag\\ + 860 & Ryan Ross vandalism & 3451 & disallow\\ + 766 & Alt-right labeling & 1866 & warn,tag\\ + 921 & Suspicious claims of nazism & 1422 & tag\\ + 843 & Prevent new users from creating redirects to [[Donald Trump]] & 98 & disallow\\ + \end{tabular} + \caption{Filters aimed at unconfirmed users}~\label{tab:newbie-filters} +\end{table*} \subsection{Who Trips Filters} diff --git a/thesis/6-Discussion.tex b/thesis/6-Discussion.tex index 9c5b21e8d9e9bc4926e1497769ca2e6862602ec6..9035fcaf770178efce85555ef2c16258d4fb2498 100644 --- a/thesis/6-Discussion.tex +++ b/thesis/6-Discussion.tex @@ -133,20 +133,10 @@ and that I'm not willing to offer potential trolls ready-made lists. Finally, it stands to reason that if we are interested in the question when do people (who have access to both) implement a bot and when a filter, all we have to do is ask (see directions for future research in section~\ref{}). \end{comment} -At the end, closer scrutiny and critical evaluation of the filter patterns is required (syn!). +At the end, closer scrutiny and critical evaluation of the filter patterns are required. %or At the end, particular aspects of the filter patterns were critically scrutinised: -Is it fair and justified that a great number of filters target only new (not confirmed) editors? -Why is it all right for an established editor to use swear words whereas it is not for newbies (see filter 384)? - -%Fazit - -%TODO what to do with this? -\begin{comment} -## Open questions - -If discerning motivation is difficult, and, we want to achieve different results, depending on the motivation, that lead us to the question whether filtering is the proper mechanism to deal with disruptive edits. - -\end{comment} +It can be discussed whether it is fair and justified that 20\% of the enabled filters target only new (not confirmed) editors. +Why is it all right for an established editor to use swear words (see filter 384 ``Addition of bad words or other vandalism'') or insert longer strings of all caps (filter 50 ``Shouting'') whereas it is not for newbies? \section{Q4: How have these tasks evolved over time (are they changes in the type, number, etc.)?}