From 5205068e1d1f685aaec4b0008b65d32aafb9fa3e Mon Sep 17 00:00:00 2001
From: Lyudmila Vaseva <vaseva@mi.fu-berlin.de>
Date: Tue, 9 Jul 2019 17:58:34 +0200
Subject: [PATCH] Explore peak in hit nums beginning of 2016

---
 quarries/quarry-37494 |   3 +
 src/explore.ipynb     | 297 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+)
 create mode 100644 quarries/quarry-37494

diff --git a/quarries/quarry-37494 b/quarries/quarry-37494
new file mode 100644
index 0000000..a8b14aa
--- /dev/null
+++ b/quarries/quarry-37494
@@ -0,0 +1,3 @@
+use enwiki_p;
+select * from abuse_filter_log
+where afl_timestamp < 20160201000000 and afl_timestamp >= 20160101000000;
diff --git a/src/explore.ipynb b/src/explore.ipynb
index ef63de2..cedbcab 100644
--- a/src/explore.ipynb
+++ b/src/explore.ipynb
@@ -734,6 +734,303 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Explore the peak in filter hits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "10.68.16.39                               1689\n",
+       "37.113.52.15                              1249\n",
+       "95.152.44.52                              1133\n",
+       "5.165.178.194                              715\n",
+       "94.181.143.10                              697\n",
+       "95.152.42.158                              674\n",
+       "5.166.250.109                              559\n",
+       "5.166.224.152                              556\n",
+       "Acheter cialis                             533\n",
+       "37.113.51.96                               473\n",
+       "93.124.7.25                                430\n",
+       "5.167.114.39                               317\n",
+       "93.124.34.23                               307\n",
+       "36.250.176.0                               291\n",
+       "Wwekik2222kdjdj                            279\n",
+       "37.113.37.111                              274\n",
+       "94.181.170.143                             247\n",
+       "Theadityapratap                            239\n",
+       "94.181.156.128                             229\n",
+       "93.124.46.78                               222\n",
+       "Achat cialis                               222\n",
+       "93.124.28.116                              213\n",
+       "176.97.116.140                             202\n",
+       "5.165.186.39                               192\n",
+       "Acquistare cialis                          190\n",
+       "93.124.74.221                              187\n",
+       "AbhiJahazi                                 186\n",
+       "37.113.34.32                               186\n",
+       "37.113.28.187                              185\n",
+       "64.62.219.98                               180\n",
+       "                                          ... \n",
+       "Dharmendra780                                1\n",
+       "Robertdenningraider13                        1\n",
+       "SLCHT                                        1\n",
+       "2601:153:700:6870:B8D9:5791:192D:1DAE        1\n",
+       "59.96.9.214                                  1\n",
+       "207.32.26.5                                  1\n",
+       "208.61.1.124                                 1\n",
+       "149.126.106.19                               1\n",
+       "121.216.118.139                              1\n",
+       "HegerLoyneszhFG                              1\n",
+       "2607:FB90:1903:33F9:4C08:6E2:70C7:20F6       1\n",
+       "Clutchmaster163955                           1\n",
+       "86.160.200.85                                1\n",
+       "38.78.194.51                                 1\n",
+       "69.174.87.60                                 1\n",
+       "PujolBoatmanJvfh                             1\n",
+       "89.240.83.117                                1\n",
+       "MarrowWylyLRFj                               1\n",
+       "173.31.204.16                                1\n",
+       "PettigrewSparacioETJg                        1\n",
+       "Ednopsniwagn                                 1\n",
+       "Kevinfigs                                    1\n",
+       "162.228.50.214                               1\n",
+       "LoeberArelUVURo                              1\n",
+       "50.82.26.116                                 1\n",
+       "Kaileefiles                                  1\n",
+       "Bestraight                                   1\n",
+       "Dexon Software                               1\n",
+       "203.59.81.156                                1\n",
+       "119.154.88.171                               1\n",
+       "Name: afl_user_text, Length: 139586, dtype: int64"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_jan2016 = pd.read_csv(\"quarry-37494-abuselog-entries-en-wiki-in-january-2016-run389216.csv\", sep=',')\n",
+    "\n",
+    "# Number of hits per editor\n",
+    "df_jan2016['afl_user_text'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "37.113.52.15 is a Russian IP (whois --> netname:        ERTH-PENZA-PPPOE-16-NET)\n",
+    "95.152.44.52 too (whois--> netname:        RU-PENZA-VT-DSL-200901)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "edit                 300781\n",
+       "createaccount         71902\n",
+       "move                    193\n",
+       "autocreateaccount        18\n",
+       "gatheredit                9\n",
+       "delete                    4\n",
+       "Name: afl_action, dtype: int64"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of hits per editor's actions\n",
+    "df_jan2016['afl_action'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "527    71853\n",
+       "61     27072\n",
+       "650    24264\n",
+       "633    21099\n",
+       "279    18460\n",
+       "636    17279\n",
+       "384    15080\n",
+       "135    10028\n",
+       "30      7829\n",
+       "172     7471\n",
+       "271     7192\n",
+       "380     6554\n",
+       "80      6530\n",
+       "364     6238\n",
+       "686     6089\n",
+       "712     5597\n",
+       "466     5555\n",
+       "432     5398\n",
+       "220     5385\n",
+       "550     5215\n",
+       "189     4730\n",
+       "3       4656\n",
+       "148     4470\n",
+       "260     4148\n",
+       "614     4120\n",
+       "231     3398\n",
+       "631     3349\n",
+       "225     3245\n",
+       "46      3174\n",
+       "680     3134\n",
+       "       ...  \n",
+       "242       19\n",
+       "706       19\n",
+       "16        19\n",
+       "734       18\n",
+       "264       18\n",
+       "710       17\n",
+       "666       15\n",
+       "722       14\n",
+       "167       13\n",
+       "294       10\n",
+       "624        9\n",
+       "727        8\n",
+       "651        8\n",
+       "637        8\n",
+       "745        6\n",
+       "52         6\n",
+       "674        6\n",
+       "748        5\n",
+       "690        5\n",
+       "2          5\n",
+       "597        4\n",
+       "579        4\n",
+       "709        4\n",
+       "68         4\n",
+       "554        3\n",
+       "749        3\n",
+       "596        1\n",
+       "718        1\n",
+       "459        1\n",
+       "694        1\n",
+       "Name: afl_filter, Length: 138, dtype: int64"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of hits per filter\n",
+    "df_jan2016['afl_filter'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "UserLogin                                                                                                                                          71920\n",
+       "Skateboard                                                                                                                                           660\n",
+       "Conchobar_Lads_Tomlinson                                                                                                                             584\n",
+       "Billboard_(magazine)                                                                                                                                 536\n",
+       "Chris_Stark                                                                                                                                          467\n",
+       "Tyler_Joseph                                                                                                                                         431\n",
+       "Bulletin_board_system                                                                                                                                339\n",
+       "Hailey_Baldwin                                                                                                                                       328\n",
+       "Flickr_-_Official_U.S._Navy_Imagery_-_Sailors_play_board_games_with_children_at_the_Cameron_Community_Ministries_during_Rochester_Navy_Week.jpg      322\n",
+       "Ryback                                                                                                                                               287\n",
+       "Aditya_Pratap                                                                                                                                        247\n",
+       "95.152.44.52                                                                                                                                         232\n",
+       "WikiProject_Film                                                                                                                                     232\n",
+       "British_Board_of_Film_Classification                                                                                                                 224\n",
+       "10_Cloverfield_Lane                                                                                                                                  219\n",
+       "Keyboard_instrument                                                                                                                                  217\n",
+       "Board_game                                                                                                                                           203\n",
+       "176.97.116.140                                                                                                                                       202\n",
+       "93.124.46.78                                                                                                                                         198\n",
+       "AbhiJahazi                                                                                                                                           186\n",
+       "Board                                                                                                                                                184\n",
+       "Entertainment_Software_Rating_Board                                                                                                                  178\n",
+       "Battle_of_the_Alamo                                                                                                                                  169\n",
+       "Computer_keyboard                                                                                                                                    161\n",
+       "Cultural_history_of_the_buttocks                                                                                                                     155\n",
+       "Conchobar_Tomlinson                                                                                                                                  155\n",
+       "History_of_the_internal_combustion_engine                                                                                                            154\n",
+       "Skateboarder_(magazine)                                                                                                                              146\n",
+       "WikiProject_Video_games                                                                                                                              140\n",
+       "Cian_Mitchell                                                                                                                                        140\n",
+       "                                                                                                                                                   ...  \n",
+       "Surfix                                                                                                                                                 1\n",
+       "British_Colonial_Research_Committee                                                                                                                    1\n",
+       "Nassau_County_Police_Department                                                                                                                        1\n",
+       "Institute_of_Health_Sciences_Bhubaneswar                                                                                                               1\n",
+       "Underwater_Pompeii                                                                                                                                     1\n",
+       "El_Valle_de_Antón                                                                                                                                      1\n",
+       "Make_My_Journeys                                                                                                                                       1\n",
+       "Ten_Fold_Engineering                                                                                                                                   1\n",
+       "LNER_Class_A3_4472_Flying_Scotsman                                                                                                                     1\n",
+       "North_Eastern_College                                                                                                                                  1\n",
+       "Zillow                                                                                                                                                 1\n",
+       "Ww57614wW                                                                                                                                              1\n",
+       "Junkyard_Dog                                                                                                                                           1\n",
+       "John_Steptoe                                                                                                                                           1\n",
+       "Financial_doctor                                                                                                                                       1\n",
+       "Music_of_Portugal                                                                                                                                      1\n",
+       "Northwoods_Mall_(North_Charleston,_South_Carolina)                                                                                                     1\n",
+       "Niuman_Romero                                                                                                                                          1\n",
+       "Chaudhry_Faisal_Mushtaq                                                                                                                                1\n",
+       "Big_Island_(Bay_of_Quinte)                                                                                                                             1\n",
+       "Venus_of_Willendorf                                                                                                                                    1\n",
+       "Alexis_Argüello                                                                                                                                        1\n",
+       "Pdfpdf/Images_of_Adelaide                                                                                                                              1\n",
+       "Chickasaw_State_Park_(Alabama)                                                                                                                         1\n",
+       "Torin's_Passage                                                                                                                                        1\n",
+       "Gulshanthakurbohani                                                                                                                                    1\n",
+       "WPCH-TV                                                                                                                                                1\n",
+       "List_of_countries_with_IKEA_stores                                                                                                                     1\n",
+       "Phelps,_New_York                                                                                                                                       1\n",
+       "Micheál_Quirke                                                                                                                                         1\n",
+       "Name: afl_title, Length: 91387, dtype: int64"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of hits per intended edit page\n",
+    "df_jan2016['afl_title'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To be fair, I don't see any particularly interesting or conspicious pattern, beside the 71920 attempts at account creations. But maybe it's exactly them that make the 71920 hits difference to all the \"standard\" numbers. I'm comparing this with September 2016 (238406 hits) for reference."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
-- 
GitLab