diff --git a/src/explore.ipynb b/src/explore.ipynb index b05c7c4039d7602ac2651a96d5fa077e8ea9e707..ce598d4328250fa7a0640ec59a69d7e046e62238 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -455,7 +455,7 @@ "print (df.query('af_group!=\"default\"'))\n", "\n", "# --> so available groups are \"default\" and \"feedback\"\n", - "# TODO: question: what do they mean?\n", + "# question: what do they mean?\n", "# From https://www.mediawiki.org/wiki/Extension:AbuseFilter/abuse_filter_table :\n", "# \"The group this filter belongs to, as defined in $wgAbuseFilterValidGroups.\" still don't get it\n", "'''\n", @@ -8806,7 +8806,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -8821,7 +8821,7 @@ "Name: afl_action, dtype: int64" ] }, - "execution_count": 34, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -8833,6 +8833,87 @@ "df_feb2016['afl_action'].value_counts()" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "527 67669\n", + "61 26874\n", + "650 23054\n", + "633 19779\n", + "279 18099\n", + "636 17891\n", + "384 15776\n", + "135 11148\n", + "30 7936\n", + "172 7315\n", + "380 6982\n", + "364 6071\n", + "432 5500\n", + "550 5432\n", + "80 5424\n", + "271 5328\n", + "712 5260\n", + "3 5223\n", + "686 5181\n", + "752 5027\n", + "189 4706\n", + "220 4572\n", + "260 4259\n", + "614 4181\n", + "231 3741\n", + "680 3662\n", + "491 3572\n", + "225 3483\n", + "631 3479\n", + "46 3345\n", + " ... \n", + "699 23\n", + "709 23\n", + "674 21\n", + "698 20\n", + "624 19\n", + "167 17\n", + "16 16\n", + "742 15\n", + "744 13\n", + "139 13\n", + "751 12\n", + "639 11\n", + "745 10\n", + "459 10\n", + "642 10\n", + "242 9\n", + "750 8\n", + "731 8\n", + "738 8\n", + "554 7\n", + "718 7\n", + "294 6\n", + "597 4\n", + "710 3\n", + "706 2\n", + "651 2\n", + "694 1\n", + "666 1\n", + "2 1\n", + "1 1\n", + "Name: afl_filter, Length: 128, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_feb2016['afl_filter'].value_counts()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -9020,7 +9101,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -9034,7 +9115,7 @@ "Name: afl_action, dtype: int64" ] }, - "execution_count": 38, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -9053,6 +9134,87 @@ "Again, quite a high createaccount counts.." ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "527 51720\n", + "61 27554\n", + "650 24438\n", + "633 20600\n", + "279 19557\n", + "636 17656\n", + "384 14654\n", + "135 9725\n", + "271 9185\n", + "30 9008\n", + "172 7552\n", + "380 6795\n", + "80 6211\n", + "220 6064\n", + "550 6055\n", + "364 5967\n", + "752 5773\n", + "712 5669\n", + "686 5345\n", + "432 4782\n", + "189 4727\n", + "3 4465\n", + "260 4312\n", + "466 3698\n", + "225 3557\n", + "29 3536\n", + "631 3474\n", + "231 3371\n", + "614 3252\n", + "680 3216\n", + " ... \n", + "5 33\n", + "745 32\n", + "68 29\n", + "738 26\n", + "744 24\n", + "464 23\n", + "139 23\n", + "734 22\n", + "676 19\n", + "242 19\n", + "264 18\n", + "756 17\n", + "750 14\n", + "167 14\n", + "751 11\n", + "624 10\n", + "722 9\n", + "16 7\n", + "639 6\n", + "690 5\n", + "459 4\n", + "596 3\n", + "642 3\n", + "651 2\n", + "718 2\n", + "710 1\n", + "597 1\n", + "294 1\n", + "554 1\n", + "1 1\n", + "Name: afl_filter, Length: 127, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_mar2016['afl_filter'].value_counts()" + ] + }, { "cell_type": "code", "execution_count": 39, @@ -11031,7 +11193,7 @@ } ], "source": [ - "#TODO Multi line chart: a line per action;\n", + "# Multi line chart: a line per action;\n", "# TODO why are there combis such as \"disallow,tag\", but no \"warn,tag\"?\n", "\n", "# style\n", @@ -11322,7 +11484,7 @@ } ], "source": [ - "#TODO Multi line chart: a line per action;\n", + "# Multi line chart: a line per action;\n", "# TODO why are there combis such as \"disallow,tag\", but no \"warn,tag\"?\n", "\n", "# style\n", @@ -12177,9 +12339,7 @@ } ], "source": [ - "# TODO multi line chart\n", - "#TODO Multi line chart: a line per action;\n", - "# TODO why are there combis such as \"disallow,tag\", but no \"warn,tag\"?\n", + "# Multi line chart: a line per action;\n", "\n", "# style\n", "plt.style.use('seaborn-darkgrid')\n", @@ -14138,7 +14298,7 @@ } ], "source": [ - "# TODO make the above pie chart for active filters only\n", + "# make the above pie chart for active filters only\n", "enabled_tags_2nd = collections.Counter(flatten([x.split(\", \") for x in list(df_2nd.query('af_enabled==1')['manual_tags'])])).most_common()\n", "\n", "# compute num of filters per parent category\n", @@ -14188,7 +14348,7 @@ "metadata": {}, "outputs": [], "source": [ - "#TODO Plot (manual tags)*(hit count) for all filters\n", + "# Plot (manual tags)*(hit count) for all filters\n", "\n", "\"\"\"\n", "Note\n", @@ -15229,15 +15389,6 @@ "df_2009_tags_2nd" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#TODO Plot a per year (manual tags)*(hit count) for all filters" - ] - }, { "cell_type": "markdown", "metadata": {},