diff --git a/src/explore.ipynb b/src/explore.ipynb index 47669f73dc17c7cd621faf3d8c51130cc4148f14..151b18aaf7263a91e061f388efab1ca8e8d94c50 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -317,418 +317,141 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>LogMonth</th>\n", - " <th>Freq</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>2019-03-01</td>\n", - " <td>34309</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>2019-02-01</td>\n", - " <td>236606</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>2019-01-01</td>\n", - " <td>252668</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>2018-12-01</td>\n", - " <td>226287</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>2018-11-01</td>\n", - " <td>253233</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>2018-10-01</td>\n", - " <td>256438</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>2018-09-01</td>\n", - " <td>230354</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>2018-08-01</td>\n", - " <td>216045</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>2018-07-01</td>\n", - " <td>205477</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>2018-06-01</td>\n", - " <td>209374</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>2018-05-01</td>\n", - " <td>238235</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>2018-04-01</td>\n", - " <td>242857</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>2018-03-01</td>\n", - " <td>255431</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>2018-02-01</td>\n", - " <td>213548</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>2018-01-01</td>\n", - " <td>235705</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>2017-12-01</td>\n", - " <td>213359</td>\n", - " </tr>\n", - " <tr>\n", - " <th>16</th>\n", - " <td>2017-11-01</td>\n", - " <td>254671</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>2017-10-01</td>\n", - " <td>224244</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>2017-09-01</td>\n", - " <td>212790</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19</th>\n", - " <td>2017-08-01</td>\n", - " <td>195593</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>2017-07-01</td>\n", - " <td>176778</td>\n", - " </tr>\n", - " <tr>\n", - " <th>21</th>\n", - " <td>2017-06-01</td>\n", - " <td>184406</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>2017-05-01</td>\n", - " <td>231250</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23</th>\n", - " <td>2017-04-01</td>\n", - " <td>225176</td>\n", - " </tr>\n", - " <tr>\n", - " <th>24</th>\n", - " <td>2017-03-01</td>\n", - " <td>257081</td>\n", - " </tr>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>2017-02-01</td>\n", - " <td>246199</td>\n", - " </tr>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>2017-01-01</td>\n", - " <td>256925</td>\n", - " </tr>\n", - " <tr>\n", - " <th>27</th>\n", - " <td>2016-12-01</td>\n", - " <td>226680</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>2016-11-01</td>\n", - " <td>258655</td>\n", - " </tr>\n", - " <tr>\n", - " <th>29</th>\n", - " <td>2016-10-01</td>\n", - " <td>254070</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>91</th>\n", - " <td>2011-08-01</td>\n", - " <td>133965</td>\n", - " </tr>\n", - " <tr>\n", - " <th>92</th>\n", - " <td>2011-07-01</td>\n", - " <td>133186</td>\n", - " </tr>\n", - " <tr>\n", - " <th>93</th>\n", - " <td>2011-06-01</td>\n", - " <td>143617</td>\n", - " </tr>\n", - " <tr>\n", - " <th>94</th>\n", - " <td>2011-05-01</td>\n", - " <td>173431</td>\n", - " </tr>\n", - " <tr>\n", - " <th>95</th>\n", - " <td>2011-04-01</td>\n", - " <td>160696</td>\n", - " </tr>\n", - " <tr>\n", - " <th>96</th>\n", - " <td>2011-03-01</td>\n", - " <td>177539</td>\n", - " </tr>\n", - " <tr>\n", - " <th>97</th>\n", - " <td>2011-02-01</td>\n", - " <td>173222</td>\n", - " </tr>\n", - " <tr>\n", - " <th>98</th>\n", - " <td>2011-01-01</td>\n", - " <td>181135</td>\n", - " </tr>\n", - " <tr>\n", - " <th>99</th>\n", - " <td>2010-12-01</td>\n", - " <td>157738</td>\n", - " </tr>\n", - " <tr>\n", - " <th>100</th>\n", - " <td>2010-11-01</td>\n", - " <td>195329</td>\n", - " </tr>\n", - " <tr>\n", - " <th>101</th>\n", - " <td>2010-10-01</td>\n", - " <td>206730</td>\n", - " </tr>\n", - " <tr>\n", - " <th>102</th>\n", - " <td>2010-09-01</td>\n", - " <td>174706</td>\n", - " </tr>\n", - " <tr>\n", - " <th>103</th>\n", - " <td>2010-08-01</td>\n", - " <td>138323</td>\n", - " </tr>\n", - " <tr>\n", - " <th>104</th>\n", - " <td>2010-07-01</td>\n", - " <td>127687</td>\n", - " </tr>\n", - " <tr>\n", - " <th>105</th>\n", - " <td>2010-06-01</td>\n", - " <td>146164</td>\n", - " </tr>\n", - " <tr>\n", - " <th>106</th>\n", - " <td>2010-05-01</td>\n", - " <td>206822</td>\n", - " </tr>\n", - " <tr>\n", - " <th>107</th>\n", - " <td>2010-04-01</td>\n", - " <td>197381</td>\n", - " </tr>\n", - " <tr>\n", - " <th>108</th>\n", - " <td>2010-03-01</td>\n", - " <td>217092</td>\n", - " </tr>\n", - " <tr>\n", - " <th>109</th>\n", - " <td>2010-02-01</td>\n", - " <td>206380</td>\n", - " </tr>\n", - " <tr>\n", - " <th>110</th>\n", - " <td>2010-01-01</td>\n", - " <td>193376</td>\n", - " </tr>\n", - " <tr>\n", - " <th>111</th>\n", - " <td>2009-12-01</td>\n", - " <td>166506</td>\n", - " </tr>\n", - " <tr>\n", - " <th>112</th>\n", - " <td>2009-11-01</td>\n", - " <td>210227</td>\n", - " </tr>\n", - " <tr>\n", - " <th>113</th>\n", - " <td>2009-10-01</td>\n", - " <td>226865</td>\n", - " </tr>\n", - " <tr>\n", - " <th>114</th>\n", - " <td>2009-09-01</td>\n", - " <td>213208</td>\n", - " </tr>\n", - " <tr>\n", - " <th>115</th>\n", - " <td>2009-08-01</td>\n", - " <td>162281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>116</th>\n", - " <td>2009-07-01</td>\n", - " <td>160993</td>\n", - " </tr>\n", - " <tr>\n", - " <th>117</th>\n", - " <td>2009-06-01</td>\n", - " <td>179176</td>\n", - " </tr>\n", - " <tr>\n", - " <th>118</th>\n", - " <td>2009-05-01</td>\n", - " <td>211879</td>\n", - " </tr>\n", - " <tr>\n", - " <th>119</th>\n", - " <td>2009-04-01</td>\n", - " <td>179152</td>\n", - " </tr>\n", - " <tr>\n", - " <th>120</th>\n", - " <td>2009-03-01</td>\n", - " <td>99008</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>121 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " LogMonth Freq\n", - "0 2019-03-01 34309\n", - "1 2019-02-01 236606\n", - "2 2019-01-01 252668\n", - "3 2018-12-01 226287\n", - "4 2018-11-01 253233\n", - "5 2018-10-01 256438\n", - "6 2018-09-01 230354\n", - "7 2018-08-01 216045\n", - "8 2018-07-01 205477\n", - "9 2018-06-01 209374\n", - "10 2018-05-01 238235\n", - "11 2018-04-01 242857\n", - "12 2018-03-01 255431\n", - "13 2018-02-01 213548\n", - "14 2018-01-01 235705\n", - "15 2017-12-01 213359\n", - "16 2017-11-01 254671\n", - "17 2017-10-01 224244\n", - "18 2017-09-01 212790\n", - "19 2017-08-01 195593\n", - "20 2017-07-01 176778\n", - "21 2017-06-01 184406\n", - "22 2017-05-01 231250\n", - "23 2017-04-01 225176\n", - "24 2017-03-01 257081\n", - "25 2017-02-01 246199\n", - "26 2017-01-01 256925\n", - "27 2016-12-01 226680\n", - "28 2016-11-01 258655\n", - "29 2016-10-01 254070\n", - ".. ... ...\n", - "91 2011-08-01 133965\n", - "92 2011-07-01 133186\n", - "93 2011-06-01 143617\n", - "94 2011-05-01 173431\n", - "95 2011-04-01 160696\n", - "96 2011-03-01 177539\n", - "97 2011-02-01 173222\n", - "98 2011-01-01 181135\n", - "99 2010-12-01 157738\n", - "100 2010-11-01 195329\n", - "101 2010-10-01 206730\n", - "102 2010-09-01 174706\n", - "103 2010-08-01 138323\n", - "104 2010-07-01 127687\n", - "105 2010-06-01 146164\n", - "106 2010-05-01 206822\n", - "107 2010-04-01 197381\n", - "108 2010-03-01 217092\n", - "109 2010-02-01 206380\n", - "110 2010-01-01 193376\n", - "111 2009-12-01 166506\n", - "112 2009-11-01 210227\n", - "113 2009-10-01 226865\n", - "114 2009-09-01 213208\n", - "115 2009-08-01 162281\n", - "116 2009-07-01 160993\n", - "117 2009-06-01 179176\n", - "118 2009-05-01 211879\n", - "119 2009-04-01 179152\n", - "120 2009-03-01 99008\n", - "\n", - "[121 rows x 2 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " LogMonth Freq\n", + "0 2019-03-01 34309\n", + "1 2019-02-01 236606\n", + "2 2019-01-01 252668\n", + "3 2018-12-01 226287\n", + "4 2018-11-01 253233\n", + "5 2018-10-01 256438\n", + "6 2018-09-01 230354\n", + "7 2018-08-01 216045\n", + "8 2018-07-01 205477\n", + "9 2018-06-01 209374\n", + "10 2018-05-01 238235\n", + "11 2018-04-01 242857\n", + "12 2018-03-01 255431\n", + "13 2018-02-01 213548\n", + "14 2018-01-01 235705\n", + "15 2017-12-01 213359\n", + "16 2017-11-01 254671\n", + "17 2017-10-01 224244\n", + "18 2017-09-01 212790\n", + "19 2017-08-01 195593\n", + "20 2017-07-01 176778\n", + "21 2017-06-01 184406\n", + "22 2017-05-01 231250\n", + "23 2017-04-01 225176\n", + "24 2017-03-01 257081\n", + "25 2017-02-01 246199\n", + "26 2017-01-01 256925\n", + "27 2016-12-01 226680\n", + "28 2016-11-01 258655\n", + "29 2016-10-01 254070\n", + "30 2016-09-01 238406\n", + "31 2016-08-01 192681\n", + "32 2016-07-01 171567\n", + "33 2016-06-01 201867\n", + "34 2016-05-01 294276\n", + "35 2016-04-01 319474\n", + "36 2016-03-01 360327\n", + "37 2016-02-01 366637\n", + "38 2016-01-01 372907\n", + "39 2015-12-01 307003\n", + "40 2015-11-01 338129\n", + "41 2015-10-01 264966\n", + "42 2015-09-01 272765\n", + "43 2015-08-01 215203\n", + "44 2015-07-01 207626\n", + "45 2015-06-01 236916\n", + "46 2015-05-01 250012\n", + "47 2015-04-01 201602\n", + "48 2015-03-01 188296\n", + "49 2015-02-01 163529\n", + "50 2015-01-01 163552\n", + "51 2014-12-01 137314\n", + "52 2014-11-01 154933\n", + "53 2014-10-01 165732\n", + "54 2014-09-01 132150\n", + "55 2014-08-01 93636\n", + "56 2014-07-01 80837\n", + "57 2014-06-01 94967\n", + "58 2014-05-01 132788\n", + "59 2014-04-01 129260\n", + "60 2014-03-01 157123\n", + "61 2014-02-01 146198\n", + "62 2014-01-01 151970\n", + "63 2013-12-01 131054\n", + "64 2013-11-01 156195\n", + "65 2013-10-01 163029\n", + "66 2013-09-01 122300\n", + "67 2013-08-01 117010\n", + "68 2013-07-01 113163\n", + "69 2013-06-01 109264\n", + "70 2013-05-01 163644\n", + "71 2013-04-01 146456\n", + "72 2013-03-01 156797\n", + "73 2013-02-01 196511\n", + "74 2013-01-01 169755\n", + "75 2012-12-01 147379\n", + "76 2012-11-01 186991\n", + "77 2012-10-01 204204\n", + "78 2012-09-01 160734\n", + "79 2012-08-01 122418\n", + "80 2012-07-01 110906\n", + "81 2012-06-01 123762\n", + "82 2012-05-01 178005\n", + "83 2012-04-01 173853\n", + "84 2012-03-01 184071\n", + "85 2012-02-01 197130\n", + "86 2012-01-01 192057\n", + "87 2011-12-01 175397\n", + "88 2011-11-01 211174\n", + "89 2011-10-01 224586\n", + "90 2011-09-01 171945\n", + "91 2011-08-01 133965\n", + "92 2011-07-01 133186\n", + "93 2011-06-01 143617\n", + "94 2011-05-01 173431\n", + "95 2011-04-01 160696\n", + "96 2011-03-01 177539\n", + "97 2011-02-01 173222\n", + "98 2011-01-01 181135\n", + "99 2010-12-01 157738\n", + "100 2010-11-01 195329\n", + "101 2010-10-01 206730\n", + "102 2010-09-01 174706\n", + "103 2010-08-01 138323\n", + "104 2010-07-01 127687\n", + "105 2010-06-01 146164\n", + "106 2010-05-01 206822\n", + "107 2010-04-01 197381\n", + "108 2010-03-01 217092\n", + "109 2010-02-01 206380\n", + "110 2010-01-01 193376\n", + "111 2009-12-01 166506\n", + "112 2009-11-01 210227\n", + "113 2009-10-01 226865\n", + "114 2009-09-01 213208\n", + "115 2009-08-01 162281\n", + "116 2009-07-01 160993\n", + "117 2009-06-01 179176\n", + "118 2009-05-01 211879\n", + "119 2009-04-01 179152\n", + "120 2009-03-01 99008\n" + ] } ], "source": [ "# filter hits per month (all filters) (data quarry from 05.03.2019, that's why hitcount Mar 2019 is so small)\n", "df_hits = pd.read_csv(\"quarry-34014-how-many-abuse-filter-hits-have-been-there-per-month-en-wiki-run346197.csv\", sep=',')\n", "df_hits['LogMonth'] = pd.to_datetime(df_hits['LogMonth'], format=\"%Y%m\")\n", - "df_hits" + "\n", + "with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n", + " print(df_hits)\n" ] }, {