From 61b4e9ec9eab86c3d96f75b00fd9eb30a462405a Mon Sep 17 00:00:00 2001 From: Lyudmila Vaseva <vaseva@mi.fu-berlin.de> Date: Mon, 11 Mar 2019 08:54:26 +0100 Subject: [PATCH] Restructure general stats, add piechart --- src/explore.ipynb | 123 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 14 deletions(-) diff --git a/src/explore.ipynb b/src/explore.ipynb index 9bd0d22..9413ade 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -24,14 +24,29 @@ "We import a cleaned version of manually annotated edit filters:" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"20190106115600_filters-sorted-by-hits-manual-tags.csv\", sep='\\t')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As well as an orignial query against the abuse_filter table:" + ] + }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(\"20190106115600_filters-sorted-by-hits-manual-tags.csv\", sep='\\t')\n", - "df_origin = pd.read_csv(\"quarry-32518-all-filters-sorted-num-hits.csv\", sep=',')\n" + "df_origin = pd.read_csv(\"quarry-32518-all-filters-sorted-num-hits.csv\", sep=',')" ] }, { @@ -64,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -74,31 +89,65 @@ "201\n", "753\n", "600\n", - "110\n", "0\n" ] } ], "source": [ "# Active (enabled) filters\n", - "print (len(df.query('af_enabled==1')))\n", + "num_enabled = len(df.query('af_enabled==1'))\n", + "print (num_enabled)\n", "\n", "# Disabled filters\n", - "print (len(df.query('af_enabled==0')))\n", + "num_disabled = len(df.query('af_enabled==0'))\n", + "print (num_disabled)\n", "\n", "# Deleted filters\n", - "print (len(df.query('af_deleted==1')))\n", + "num_deleted = len(df.query('af_deleted==1'))\n", + "print (num_deleted)\n", + "\n", + "# Deleted and enabled -- make sure it's 0 \n", + "num_enabled_deleted = len(df.query('af_deleted==1 and af_enabled==1'))\n", + "print (num_enabled_deleted)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "361\n", + "110\n", + "35\n", + "216\n" + ] + } + ], + "source": [ + "# public filters\n", + "num_public = len(df.query('af_hidden==0'))\n", + "print (num_public)\n", "\n", "# Active public filters\n", - "print (len(df.query('af_hidden==0 and af_enabled==1')))\n", + "num_enabled_public = len(df.query('af_hidden==0 and af_enabled==1'))\n", + "print (num_enabled_public)\n", "\n", - "# Deleted and enabled\n", - "print (len(df.query('af_deleted==1 and af_enabled==1')))" + "# disabled (but not deleted) public filters\n", + "num_disabled_public = len(df.query('af_hidden==0 and af_enabled==0 and af_deleted==0'))\n", + "print (num_disabled_public)\n", + "\n", + "# deleted public filters\n", + "num_deleted_public = num_public - num_enabled_public - num_disabled_public\n", + "print (num_deleted_public)" ] }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -106,16 +155,62 @@ "output_type": "stream", "text": [ "593\n", - "91\n" + "91\n", + "118\n", + "384\n" ] } ], "source": [ "# hidden filters\n", - "print (len(df.query('af_hidden==1')))\n", + "num_hidden = len(df.query('af_hidden==1'))\n", + "print (num_hidden)\n", "\n", "# active hidden filters\n", - "print (len(df.query('af_hidden==1 and af_enabled==1')))" + "num_enabled_hidden = len(df.query('af_hidden==1 and af_enabled==1'))\n", + "print (num_enabled_hidden)\n", + "\n", + "# disabled (but not deleted) hidden filters\n", + "num_disabled_hidden = len(df.query('af_hidden==1 and af_enabled==0 and af_deleted==0'))\n", + "print (num_disabled_hidden)\n", + "\n", + "# deleted hidden filters\n", + "num_deleted_hidden = num_hidden - num_enabled_hidden - num_disabled_hidden\n", + "print (num_deleted_hidden)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot general overview\n", + "\n", + "# Pie chart, where the slices will be ordered and plotted counter-clockwise:\n", + "# here, we mean \"disabled, but not deleted\"\n", + "labels = ['public deleted', 'hidden deleted', 'public enabled', 'hidden enabled', 'public disabled', 'hidden disabled']\n", + "sizes = [num_deleted_public, num_deleted_hidden, num_enabled_public, num_enabled_hidden, num_disabled_public, num_disabled_hidden]\n", + "\n", + "\n", + "fig1, ax1 = plt.subplots()\n", + "ax1.set_prop_cycle(color=['lightskyblue', 'steelblue', 'yellowgreen', 'olivedrab', 'lightgrey', 'grey'])\n", + "ax1.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)\n", + "ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n", + "\n", + "\n", + "plt.show()" ] }, { -- GitLab