From b39a7356accd99a9a089eebb6d6288e76fbc5d64 Mon Sep 17 00:00:00 2001
From: Lyudmila Vaseva <vaseva@mi.fu-berlin.de>
Date: Fri, 14 Dec 2018 11:43:12 +0100
Subject: [PATCH] Add notes on the AbuseFilter extension

---
 EN-state-of-the-art |  10 ++++
 notes               | 138 ++++++++++++++++++++++++++++++++++++++++++++
 todo                |   2 +
 3 files changed, 150 insertions(+)

diff --git a/EN-state-of-the-art b/EN-state-of-the-art
index a1831aa..5d53907 100644
--- a/EN-state-of-the-art
+++ b/EN-state-of-the-art
@@ -255,6 +255,16 @@ Filter ID 	Public description 	Actions 	Status 	Last modified 	Visibility 	Hit c
 https://en.wikipedia.org/wiki/Special:AbuseFilter/61
 statistics are info such as "Of the last 1,728 actions, this filter has matched 10 (0.58%). On average, its run time is 0.34 ms, and it consumes 3 conditions of the condition limit." // not sure what the condition limit is
 
+List information about filters:
+https://en.wikipedia.org/w/api.php?action=query&list=abusefilters&abfshow=!private&abfprop=id%7Chits
+or in the sandbox:
+https://en.wikipedia.org/wiki/Special:ApiSandbox#action=query&list=abusefilters&abfshow=!private&abfprop=id%7Chits
+
+List instances where actions triggered an abuse filter.
+https://en.wikipedia.org/w/api.php?action=query&list=abuselog&afluser=SineBot&aflprop=ids
+or in the sandbox:
+https://en.wikipedia.org/wiki/Special:ApiSandbox#action=query&list=abuselog&afluser=SineBot&aflprop=ids
+
 * percentage of triggered filters/all edits
   * break down triggered filters according to typology
 * percentage filters of different types over the years
diff --git a/notes b/notes
index 216017e..481e283 100644
--- a/notes
+++ b/notes
@@ -317,3 +317,141 @@ tips on controlling efficiency/order of operations
 lazy evaluation: when 1st negative condition is met, filter terminates execution
 
 "You should always order your filters so that the condition that will knock out the largest number of edits is first. Usually this is a user groups or a user editcount check; in general, the last condition should be the regex that is actually looking for the sort of vandalism you're targeting. "
+
+===========================================================================
+https://www.mediawiki.org/wiki/Extension:AbuseFilter
+
+Author(s)
+    Andrew Garrett, <-- lead dev
+    River Tarnell
+    Victor Vasiliev
+    Marius Hoch
+
+a media wiki extention written in php;
+licensed under GPL 2.0
+no further dependencies needed
+
+code repo: https://gerrit.wikimedia.org/g/mediawiki/extensions/AbuseFilter
+issue tracker: https://phabricator.wikimedia.org/tag/abusefilter/
+
+"Once the extension has been installed, filters can be created/tested/changed/deleted and the logs can be accessed from the Abuse filter management page Special:AbuseFilter. "
+
+you can import filters from wikipedia
+
+Creates following tables
+mysql> describe abuse_filter; (https://www.mediawiki.org/wiki/Extension:AbuseFilter/abuse_filter_table)
++--------------------+---------------------+------+-----+---------+----------------+
+| Field              | Type                | Null | Key | Default | Extra          |
++--------------------+---------------------+------+-----+---------+----------------+
+| af_id              | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| af_pattern         | blob                | NO   |     | NULL    |                |
+| af_user            | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| af_user_text       | varbinary(255)      | NO   |     | NULL    |                |
+| af_timestamp       | binary(14)          | NO   |     | NULL    |                |
+| af_enabled         | tinyint(1)          | NO   |     | 1       |                |
+| af_comments        | blob                | YES  |     | NULL    |                |
+| af_public_comments | tinyblob            | YES  |     | NULL    |                |
+| af_hidden          | tinyint(1)          | NO   |     | 0       |                |
+| af_hit_count       | bigint(20)          | NO   |     | 0       |                |
+| af_throttled       | tinyint(1)          | NO   |     | 0       |                |
+| af_deleted         | tinyint(1)          | NO   |     | 0       |                |
+| af_actions         | varbinary(255)      | NO   |     |         |                |
+| af_global          | tinyint(1)          | NO   |     | 0       |                |
+| af_group           | varbinary(64)       | NO   | MUL | default |                |
++--------------------+---------------------+------+-----+---------+----------------+
+
+mysql> describe abuse_filter_log; https://www.mediawiki.org/wiki/Extension:AbuseFilter/abuse_filter_log_table
++------------------+---------------------+------+-----+---------+----------------+
+| Field            | Type                | Null | Key | Default | Extra          |
++------------------+---------------------+------+-----+---------+----------------+
+| afl_id           | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| afl_filter       | varbinary(64)       | NO   | MUL | NULL    |                |
+| afl_user         | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afl_user_text    | varbinary(255)      | NO   |     | NULL    |                |
+| afl_ip           | varbinary(255)      | NO   | MUL | NULL    |                |
+| afl_action       | varbinary(255)      | NO   |     | NULL    |                |
+| afl_actions      | varbinary(255)      | NO   |     | NULL    |                |
+| afl_var_dump     | blob                | NO   |     | NULL    |                |
+| afl_timestamp    | binary(14)          | NO   | MUL | NULL    |                |
+| afl_namespace    | tinyint(4)          | NO   | MUL | NULL    |                |
+| afl_title        | varbinary(255)      | NO   |     | NULL    |                |
+| afl_wiki         | varbinary(64)       | YES  | MUL | NULL    |                |
+| afl_deleted      | tinyint(1)          | NO   |     | 0       |                |
+| afl_patrolled_by | int(10) unsigned    | YES  |     | NULL    |                |
+| afl_rev_id       | int(10) unsigned    | YES  | MUL | NULL    |                |
+| afl_log_id       | int(10) unsigned    | YES  | MUL | NULL    |                |
++------------------+---------------------+------+-----+---------+----------------+
+16 rows in set (0.00 sec)
+
+mysql> describe abuse_filter_history; (from https://www.mediawiki.org/wiki/Extension:AbuseFilter/abuse_filter_history_table)
++---------------------+---------------------+------+-----+---------+----------------+
+| Field               | Type                | Null | Key | Default | Extra          |
++---------------------+---------------------+------+-----+---------+----------------+
+| afh_id              | bigint(20) unsigned | NO   | PRI | NULL    | auto_increment |
+| afh_filter          | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afh_user            | bigint(20) unsigned | NO   | MUL | NULL    |                |
+| afh_user_text       | varbinary(255)      | NO   | MUL | NULL    |                |
+| afh_timestamp       | binary(14)          | NO   | MUL | NULL    |                |
+| afh_pattern         | blob                | NO   |     | NULL    |                |
+| afh_comments        | blob                | NO   |     | NULL    |                |
+| afh_flags           | tinyblob            | NO   |     | NULL    |                |
+| afh_public_comments | tinyblob            | YES  |     | NULL    |                |
+| afh_actions         | blob                | YES  |     | NULL    |                |
+| afh_deleted         | tinyint(1)          | NO   |     | 0       |                |
+| afh_changed_fields  | varbinary(255)      | NO   |     |         |                |
+| afh_group           | varbinary(64)       | YES  |     | NULL    |                |
++---------------------+---------------------+------+-----+---------+----------------+
+13 rows in set (0.00 sec)
+
+mysql> describe abuse_filter_action; (from https://www.mediawiki.org/wiki/Extension:AbuseFilter/abuse_filter_action_table)
++-----------------+---------------------+------+-----+---------+-------+
+| Field           | Type                | Null | Key | Default | Extra |
++-----------------+---------------------+------+-----+---------+-------+
+| afa_filter      | bigint(20) unsigned | NO   | PRI | NULL    |       |
+| afa_consequence | varbinary(255)      | NO   | PRI | NULL    |       |
+| afa_parameters  | tinyblob            | NO   |     | NULL    |       |
++-----------------+---------------------+------+-----+---------+-------+
+3 rows in set (0.00 sec)
+
+# API calls
+
+## List information about filters:
+https://en.wikipedia.org/w/api.php?action=query&list=abusefilters&abfshow=!private&abfprop=id%7Chits
+or in the sandbox:
+https://en.wikipedia.org/wiki/Special:ApiSandbox#action=query&list=abusefilters&abfshow=!private&abfprop=id%7Chits
+
+Parameters
+
+    abfstartid: The filter id to start enumerating from
+    abfendid: The filter id to stop enumerating at
+    abfdir: The direction in which to enumerate (older, newer)
+    abfshow: Show only filters which meet these criteria (enabled|!enabled|deleted|!deleted|private|!private)
+    abflimit: The maximum number of filters to list
+    abfprop: Which properties to get (id|description|pattern|actions|hits|comments|lasteditor|lastedittime|status|private)
+
+When filters are private, some of the properties specified with abfprop will be missing unless you have the appropriate user rights.
+
+## List instances where actions triggered an abuse filter.
+https://en.wikipedia.org/w/api.php?action=query&list=abuselog&afluser=SineBot&aflprop=ids
+or in the sandbox:
+https://en.wikipedia.org/wiki/Special:ApiSandbox#action=query&list=abuselog&afluser=SineBot&aflprop=ids
+
+Parameters
+
+    aflstart: The timestamp to start enumerating from
+    aflend: The timestamp to stop enumerating at
+    afldir: The direction in which to enumerate (older, newer)
+    afluser: Show only entries where the action was attempted by a given user or IP address.
+    afltitle: Show only entries where the action involved a given page.
+    aflfilter: Show only entries that triggered a given filter ID
+    afllimit: The maximum number of entries to list
+    aflprop: Which properties to get (ids|user|title|action|result|timestamp|details)
+
+
+
+===========================================================================
+https://www.mediawiki.org/wiki/Extension:AbuseFilter/Rules_format
+
+
+===========================================================================
+https://phabricator.wikimedia.org/tag/abusefilter/
diff --git a/todo b/todo
index 9f00833..0d1d69c 100644
--- a/todo
+++ b/todo
@@ -2,6 +2,7 @@
 
 * Look at filters: what different types of filters are there
 * understand how are stats generated
+  * look for db dumps
   * research filter development over time
 
   https://meta.wikimedia.org/wiki/Research:Quarry
@@ -10,6 +11,7 @@
   https://upload.wikimedia.org/wikipedia/commons/9/94/MediaWiki_1.28.0_database_schema.svg
   https://tools.wmflabs.org/
   https://tools.wmflabs.org/admin/tools
+  https://www.mediawiki.org/wiki/API:Main_page
 
   * create a developer account
   * ping aaron/amir for access to a backend db to look at filters; explanation how this is helping the community is important
-- 
GitLab