From ce21761255e13aac7b036668012881ada6164567 Mon Sep 17 00:00:00 2001
From: Lyudmila Vaseva <vaseva@mi.fu-berlin.de>
Date: Wed, 8 May 2019 11:46:00 +0200
Subject: [PATCH] Finish general presentation outline

---
 research-group-presi/slides.html |  51 +++++++++-
 research-group-presi/slides.md   | 164 ++++++++++++++++++++++++++++++-
 2 files changed, 213 insertions(+), 2 deletions(-)

diff --git a/research-group-presi/slides.html b/research-group-presi/slides.html
index 970c896..b06ad3e 100644
--- a/research-group-presi/slides.html
+++ b/research-group-presi/slides.html
@@ -247,7 +247,56 @@ Jun 2010 : STiki initial release
 <section class="slide level1">
 
 <h2 id="state-of-the-art-on-en-wikipedia">State of the Art on EN Wikipedia</h2>
-<p>Data analysis</p>
+<p>Data analysis of the abuse filter extension tables</p>
+</section>
+<section class="slide level1">
+
+<p>+--------------------+---------------------+------+-----+---------+----------------+ | Field | Type | Null | Key | Default | Extra | +--------------------+---------------------+------+-----+---------+----------------+ | af_id | bigint(20) unsigned | NO | PRI | NULL | auto_increment | | af_pattern | blob | NO | | NULL | | | af_user | bigint(20) unsigned | NO | MUL | NULL | | | af_user_text | varbinary(255) | NO | | NULL | | | af_timestamp | binary(14) | NO | | NULL | | | af_enabled | tinyint(1) | NO | | 1 | | | af_comments | blob | YES | | NULL | | | af_public_comments | tinyblob | YES | | NULL | | | af_hidden | tinyint(1) | NO | | 0 | | | af_hit_count | bigint(20) | NO | | 0 | | | af_throttled | tinyint(1) | NO | | 0 | | | af_deleted | tinyint(1) | NO | | 0 | | | af_actions | varbinary(255) | NO | | | | | af_global | tinyint(1) | NO | | 0 | | | af_group | varbinary(64) | NO | MUL | default | | +--------------------+---------------------+------+-----+---------+----------------+ \end{verbatim} ~ \end{figure*}</p>
+</section>
+<section class="slide level1">
+
+
+</section>
+<section class="slide level1">
+
+
+</section>
+<section class="slide level1">
+
+\begin{figure*}
+
+</section>
+<section class="slide level1">
+
+<h2 id="what-do-most-active-filters-do">What do most active filters do?</h2>
+
+</section>
+<section class="slide level1">
+
+<h2 id="descriptive-statistics">Descriptive statistics</h2>
+<p>see jupyter notebook for diagrams</p>
+</section>
+<section class="slide level1">
+
+<h2 id="public-and-hidden-filters">Public and hidden filters</h2>
+<ul>
+<li class="fragment">2/3 of filters are hidden</li>
+<li class="fragment">all admins can view hidden filters</li>
+<li class="fragment">mailinglist for discussing private filters</li>
+</ul>
+</section>
+<section class="slide level1">
+
+<h2 id="manual-classification">Manual classification</h2>
+<p><em>vandalism</em>, <em>good faith</em> and <em>maintenance</em></p>
+</section>
+<section class="slide level1">
+
+<p>diagram with sublables</p>
+</section>
+<section class="slide level1">
+
+<p>check memos</p>
 </section>
 <section id="next-steps-for-finishing-the-thesis" class="slide level1">
 <h1>Next steps for finishing the thesis</h1>
diff --git a/research-group-presi/slides.md b/research-group-presi/slides.md
index 8daac18..ad4c3a7 100644
--- a/research-group-presi/slides.md
+++ b/research-group-presi/slides.md
@@ -239,7 +239,169 @@ funnel diagram with filters
 
 ## State of the Art on EN Wikipedia
 
-Data analysis
+Data analysis of the abuse filter extension tables
+
+---
+
++--------------------+---------------------+------+-----+---------+----------------+
+| Field              | Type                | Null | Key | Default | Extra          |
++--------------------+---------------------+------+-----+---------+----------------+
+| af_id              | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| af_pattern         | blob                | NO   |     | NULL    |                |
+| af_user            | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| af_user_text       | varbinary(255)      | NO   |     | NULL    |                |
+| af_timestamp       | binary(14)          | NO   |     | NULL    |                |
+| af_enabled         | tinyint(1)          | NO   |     | 1       |                |
+| af_comments        | blob                | YES  |     | NULL    |                |
+| af_public_comments | tinyblob            | YES  |     | NULL    |                |
+| af_hidden          | tinyint(1)          | NO   |     | 0       |                |
+| af_hit_count       | bigint(20)          | NO   |     | 0       |                |
+| af_throttled       | tinyint(1)          | NO   |     | 0       |                |
+| af_deleted         | tinyint(1)          | NO   |     | 0       |                |
+| af_actions         | varbinary(255)      | NO   |     |         |                |
+| af_global          | tinyint(1)          | NO   |     | 0       |                |
+| af_group           | varbinary(64)       | NO   | MUL | default |                |
++--------------------+---------------------+------+-----+---------+----------------+
+\end{verbatim}
+  \caption{abuse\_filter schema}~\label{fig:db-schemas-af}
+\end{figure*}
+
+---
+
+\begin{figure*}
+\begin{verbatim}
+abuse_filter_log
++------------------+---------------------+------+-----+---------+----------------+
+| Field            | Type                | Null | Key | Default | Extra          |
++------------------+---------------------+------+-----+---------+----------------+
+| afl_id           | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| afl_filter       | varbinary(64)       | NO   | MUL | NULL    |                |
+| afl_user         | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afl_user_text    | varbinary(255)      | NO   |     | NULL    |                |
+| afl_ip           | varbinary(255)      | NO   | MUL | NULL    |                |
+| afl_action       | varbinary(255)      | NO   |     | NULL    |                |
+| afl_actions      | varbinary(255)      | NO   |     | NULL    |                |
+| afl_var_dump     | blob                | NO   |     | NULL    |                |
+| afl_timestamp    | binary(14)          | NO   | MUL | NULL    |                |
+| afl_namespace    | tinyint(4)          | NO   | MUL | NULL    |                |
+| afl_title        | varbinary(255)      | NO   |     | NULL    |                |
+| afl_wiki         | varbinary(64)       | YES  | MUL | NULL    |                |
+| afl_deleted      | tinyint(1)          | NO   |     | 0       |                |
+| afl_patrolled_by | int(10) unsigned    | YES  |     | NULL    |                |
+| afl_rev_id       | int(10) unsigned    | YES  | MUL | NULL    |                |
+| afl_log_id       | int(10) unsigned    | YES  | MUL | NULL    |                |
++------------------+---------------------+------+-----+---------+----------------+
+\end{verbatim}
+  \caption{abuse\_filter\_log schema}~\label{fig:db-schemas-afl}
+\end{figure*}
+
+---
+
+\begin{figure*}
+\begin{verbatim}
+abuse_filter_history
++---------------------+---------------------+------+-----+---------+----------------+
+| Field               | Type                | Null | Key | Default | Extra          |
++---------------------+---------------------+------+-----+---------+----------------+
+| afh_id              | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| afh_filter          | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afh_user            | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afh_user_text       | varbinary(255)      | NO   | MUL | NULL    |                |
+| afh_timestamp       | binary(14)          | NO   | MUL | NULL    |                |
+| afh_pattern         | blob                | NO   |     | NULL    |                |
+| afh_comments        | blob                | NO   |     | NULL    |                |
+| afh_flags           | tinyblob            | NO   |     | NULL    |                |
+| afh_public_comments | tinyblob            | YES  |     | NULL    |                |
+| afh_actions         | blob                | YES  |     | NULL    |                |
+| afh_deleted         | tinyint(1)          | NO   |     | 0       |                |
+| afh_changed_fields  | varbinary(255)      | NO   |     |         |                |
+| afh_group           | varbinary(64)       | YES  |     | NULL    |                |
++---------------------+---------------------+------+-----+---------+----------------+
+\end{verbatim}
+  \caption{abuse\_filter\_history schema}~\label{fig:db-schemas-afh}
+\end{figure*}
+
+---
+
+\begin{figure*}
+\begin{verbatim}
+abuse_filter_action
++-----------------+---------------------+------+-----+---------+-------+
+| Field           | Type                | Null | Key | Default | Extra |
++-----------------+---------------------+------+-----+---------+-------+
+| afa_filter      | bigint(20) unsigned | NO   | PRI | NULL    |       |
+| afa_consequence | varbinary(255)      | NO   | PRI | NULL    |       |
+| afa_parameters  | tinyblob            | NO   |     | NULL    |       |
++-----------------+---------------------+------+-----+---------+-------+
+\end{verbatim}
+
+---
+
+## What do most active filters do?
+
+\begin{table*}
+  \centering
+    \begin{tabular}{r p{10cm} p{5cm} }
+    % \toprule
+    Filter ID & Publicly available description & Actions \\
+    \hline
+      135 & repeating characters & tag, warn \\
+      30 & "large deletion from article by new editors" & tag, warn \\
+      61 & "new user removing references" ("new user" is handled by "!("confirmed" in user\_groups)") & tag \\
+      18 & "test type edits from clicking on edit bar" (people don't replace Example texts when click-editing) & deleted in Feb 2012 \\
+      3 & "new user blanking articles" & tag, warn \\
+      172 & "section blanking" & tag \\
+      50 & "shouting" (contribution consists of all caps, numbers and punctuation) & tag, warn \\
+      98 & "creating very short new article" & tag \\
+      65 & "excessive whitespace" (note: "associated with ascii art and some types of vandalism") & deleted in Jan 2010 \\
+      132 & "removal of all categories" & tag, warn \\
+      225 & "vandalism in all caps" (difference to 50? seems to be swear words, but shouldn't they be catched by 50 anyway?) & disallow \\
+      189 & "BLP vandalism or libel" & tag \\
+      402 & "new article without references" & deleted in Apr 2013, before that disabled with comment "disabling, no real use" \\
+      384 & "addition of bad words or other vandalism" (seems to be a blacklist) & disallow \\
+      432 & "starting new line with lower case letters" & tag, warn //I recall there was a rule of thumb recommending not to user filters for style things? although that's not really style, but rather wrong grammar.. \\
+      380 & hidden; public comment "multiple obscenities" & disallow \\
+      351 & "text added after categories and interwiki" & tag, warn \\
+      279 & "repeated attempts to vandalise" & tag, throttle (triggered when someone hits "edit" repeatedly in a short ammount of time) \\
+      491 & "edits ending with emoticons or !" & tag, warn \\
+      636 & "unexplained removal of sourced content" & warn (that, together with 634 and 635 refutes my theory that warn always goes together with tag) \\
+      231 & "long string of characters containing no spaces" (that's surely english though^^) & tag, warn \\
+      650 & "creation of a new article without any categories" & (log only) \\
+      527 & hidden; public comments "T34234: log/throttle possible sleeper account creations" & throttle \\
+      633 & "possible canned edit summary" (apparently pre-filled on mobile though) & tag \\
+      686 & "IP adding possible unreferenced material to BLP" (BLP= biography of living people? I thought, it was forbidden to edit them without a registered account) & (log only) \\
+      712 & "possibly changing date of birth in infobox" ("possibly"? and I thought infoboxes were pre-generated from wikidata?) & (log only) \\
+      833 & "newer user possibly adding a unreferenced or improperly referenced material" & (log only) \\
+  \end{tabular}
+\end{table*}
+
+---
+
+## Descriptive statistics
+
+see jupyter notebook for diagrams
+
+---
+
+## Public and hidden filters
+
+* 2/3 of filters are hidden
+* all admins can view hidden filters
+* mailinglist for discussing private filters
+
+---
+
+## Manual classification
+
+*vandalism*, *good faith* and *maintenance*
+
+---
+
+diagram with sublables
+
+---
+
+check memos
 
 ---
 
-- 
GitLab