From da6e7e398b82c4f683f3585034d21e4efc73f66e Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Fri, 25 Dec 2015 15:08:16 +0100
Subject: [PATCH] fixed data dir resolution moved filebase implementation from
 util to cmd, it is analyzer sensitive moved exceptions to ex package added
 high precision time measurements simplified import command deleted
 stopwords.txt, using hardcoded variant

---
 ma-impl.sublime-workspace                     |  98 ++++++++++
 .../.settings/org.eclipse.jdt.core.prefs      |   6 +-
 .../src/main/java/de/vipra/cmd/Main.java      |   7 +
 .../de/vipra/cmd}/ex/FilebaseException.java   |   2 +-
 .../java/de/vipra/cmd/ex/ImportException.java |  23 +++
 .../cmd/{lda => ex}/LDAAnalyzerException.java |   2 +-
 .../{text => ex}/PreprocessorException.java   |   2 +-
 .../main/java/de/vipra/cmd/file/Filebase.java |  55 ++++++
 .../java/de/vipra/cmd/file/FilebaseIndex.java |  46 +++++
 .../java/de/vipra/cmd/file/JGibbFilebase.java |  33 ++++
 .../java/de/vipra/cmd/file/LdacFilebase.java  |  33 ++++
 .../de/vipra/cmd/lda/JGibbLDAAnalyzer.java    |  12 +-
 .../java/de/vipra/cmd/lda/LDAAnalyzer.java    |  20 +-
 .../de/vipra/cmd/lda/LdacLDAAnalyzer.java     |  12 +-
 .../de/vipra/cmd/option/DeleteCommand.java    |  18 +-
 .../de/vipra/cmd/option/ImportCommand.java    | 175 +++++++++---------
 .../de/vipra/cmd/text/CustomPreprocessor.java |  34 ++--
 .../de/vipra/cmd/text/LucenePreprocessor.java |  18 +-
 .../java/de/vipra/cmd/text/Preprocessor.java  |  29 ++-
 vipra-cmd/src/main/resources/stopwords.txt    | 173 -----------------
 .../vipra/rest/resource/ArticleResource.java  |   4 +-
 .../.settings/org.eclipse.jdt.core.prefs      |   6 +-
 vipra-util/pom.xml                            |   2 +-
 .../src/main/java/de/vipra/util/Config.java   |  71 ++++---
 .../main/java/de/vipra/util/Constants.java    |  34 +++-
 .../src/main/java/de/vipra/util/Mongo.java    |   9 +-
 .../main/java/de/vipra/util/PathUtils.java    |   8 +-
 .../main/java/de/vipra/util/StringUtils.java  |  50 ++++-
 .../src/main/java/de/vipra/util/Timer.java    |  22 +++
 .../vipra/util/{ => ex}/ConfigException.java  |   2 +-
 .../main/java/de/vipra/util/model/Model.java  |   3 +-
 .../de/vipra/util/model/TermFrequency.java    |  12 +-
 .../vipra/util/service/DatabaseService.java   |   8 +
 .../vipra/util/service/FilebaseService.java   |  92 ---------
 .../java/de/vipra/util/service/Service.java   |   2 +-
 35 files changed, 663 insertions(+), 460 deletions(-)
 rename {vipra-util/src/main/java/de/vipra/util => vipra-cmd/src/main/java/de/vipra/cmd}/ex/FilebaseException.java (89%)
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/ex/ImportException.java
 rename vipra-cmd/src/main/java/de/vipra/cmd/{lda => ex}/LDAAnalyzerException.java (89%)
 rename vipra-cmd/src/main/java/de/vipra/cmd/{text => ex}/PreprocessorException.java (89%)
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
 delete mode 100644 vipra-cmd/src/main/resources/stopwords.txt
 create mode 100644 vipra-util/src/main/java/de/vipra/util/Timer.java
 rename vipra-util/src/main/java/de/vipra/util/{ => ex}/ConfigException.java (86%)
 delete mode 100644 vipra-util/src/main/java/de/vipra/util/service/FilebaseService.java

diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace
index 5c7ab397..2fe3e420 100644
--- a/ma-impl.sublime-workspace
+++ b/ma-impl.sublime-workspace
@@ -271,6 +271,15 @@
 	},
 	"buffers":
 	[
+		{
+			"contents": "1. import all new articles:\n  * generate article statistics\n  * into database\n  * into filebase, using filebase adapter of selected tm library\n2. recreate topic modeling, using selected tm library\n  * needs no articles in memory, works completely on files\n3. insert new topic model into database\n  * how to interpret tm result?\n  * how to relate tm result to articles?\n4. index new articles\n  * needs title, processed text and topics\n\nfilebase writes into single file according to tm library\nneeds index where articles are stored in the file\n\nall new articles are held in memory?\n\noriginal text is not needed except for stats (original text length) and for db for ui browsing",
+			"settings":
+			{
+				"buffer_size": 675,
+				"line_ending": "Unix",
+				"name": "1. import all new articles:"
+			}
+		}
 	],
 	"build_system": "",
 	"build_system_choices":
@@ -892,8 +901,97 @@
 	"groups":
 	[
 		{
+			"selected": 0,
 			"sheets":
 			[
+				{
+					"buffer": 0,
+					"semi_transient": false,
+					"settings":
+					{
+						"buffer_size": 675,
+						"regions":
+						{
+						},
+						"selection":
+						[
+							[
+								579,
+								579
+							]
+						],
+						"settings":
+						{
+							"BracketHighlighterBusy": false,
+							"auto_name": "1. import all new articles:",
+							"bh_regions":
+							[
+								"bh_default",
+								"bh_default_center",
+								"bh_default_open",
+								"bh_default_close",
+								"bh_default_content",
+								"bh_square",
+								"bh_square_center",
+								"bh_square_open",
+								"bh_square_close",
+								"bh_square_content",
+								"bh_round",
+								"bh_round_center",
+								"bh_round_open",
+								"bh_round_close",
+								"bh_round_content",
+								"bh_c_define",
+								"bh_c_define_center",
+								"bh_c_define_open",
+								"bh_c_define_close",
+								"bh_c_define_content",
+								"bh_single_quote",
+								"bh_single_quote_center",
+								"bh_single_quote_open",
+								"bh_single_quote_close",
+								"bh_single_quote_content",
+								"bh_double_quote",
+								"bh_double_quote_center",
+								"bh_double_quote_open",
+								"bh_double_quote_close",
+								"bh_double_quote_content",
+								"bh_angle",
+								"bh_angle_center",
+								"bh_angle_open",
+								"bh_angle_close",
+								"bh_angle_content",
+								"bh_tag",
+								"bh_tag_center",
+								"bh_tag_open",
+								"bh_tag_close",
+								"bh_tag_content",
+								"bh_regex",
+								"bh_regex_center",
+								"bh_regex_open",
+								"bh_regex_close",
+								"bh_regex_content",
+								"bh_unmatched",
+								"bh_unmatched_center",
+								"bh_unmatched_open",
+								"bh_unmatched_close",
+								"bh_unmatched_content",
+								"bh_curly",
+								"bh_curly_center",
+								"bh_curly_open",
+								"bh_curly_close",
+								"bh_curly_content"
+							],
+							"incomplete_sync": null,
+							"syntax": "Packages/Text/Plain text.tmLanguage"
+						},
+						"translation.x": 0.0,
+						"translation.y": 0.0,
+						"zoom_level": 1.0
+					},
+					"stack_index": 0,
+					"type": "text"
+				}
 			]
 		}
 	],
diff --git a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs
index 78a9b450..8995f4d0 100644
--- a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs
+++ b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs
@@ -14,7 +14,7 @@ org.eclipse.jdt.core.compiler.source=1.7
 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=48
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
@@ -22,7 +22,7 @@ org.eclipse.jdt.core.formatter.alignment_for_assignment=0
 org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
 org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
 org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=49
 org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
 org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
 org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
@@ -31,7 +31,7 @@ org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
 org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
 org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=48
 org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java
index 899ac92a..46197f2b 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java
@@ -12,10 +12,13 @@ import org.slf4j.LoggerFactory;
 import de.vipra.cmd.option.Command;
 import de.vipra.cmd.option.DeleteCommand;
 import de.vipra.cmd.option.ImportCommand;
+import de.vipra.util.StringUtils;
+import de.vipra.util.Timer;
 
 public class Main {
 
 	public static final Logger log = LoggerFactory.getLogger(Main.class);
+	public static final Logger out = LoggerFactory.getLogger("shellout");
 
 	public static void main(String[] args) {
 		CommandLineParser parser = new DefaultParser();
@@ -49,7 +52,11 @@ public class Main {
 			}
 
 			if (c != null) {
+				Timer t = new Timer();
+				t.start();
 				c.run();
+				long dur = t.stop();
+				out.info("done in " + StringUtils.timeString(dur));
 			} else {
 				options.printHelp(cmd);
 			}
diff --git a/vipra-util/src/main/java/de/vipra/util/ex/FilebaseException.java b/vipra-cmd/src/main/java/de/vipra/cmd/ex/FilebaseException.java
similarity index 89%
rename from vipra-util/src/main/java/de/vipra/util/ex/FilebaseException.java
rename to vipra-cmd/src/main/java/de/vipra/cmd/ex/FilebaseException.java
index 99dd450e..ad5f6ef7 100644
--- a/vipra-util/src/main/java/de/vipra/util/ex/FilebaseException.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/ex/FilebaseException.java
@@ -1,4 +1,4 @@
-package de.vipra.util.ex;
+package de.vipra.cmd.ex;
 
 public class FilebaseException extends Exception {
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/ex/ImportException.java b/vipra-cmd/src/main/java/de/vipra/cmd/ex/ImportException.java
new file mode 100644
index 00000000..d7f6da33
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/ex/ImportException.java
@@ -0,0 +1,23 @@
+package de.vipra.cmd.ex;
+
+public class ImportException extends Exception {
+
+	private static final long serialVersionUID = 1L;
+
+	private final String id;
+
+	public ImportException(String msg, String id) {
+		super(msg);
+		this.id = id;
+	}
+
+	public ImportException(Exception e, String id) {
+		super(e);
+		this.id = id;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzerException.java b/vipra-cmd/src/main/java/de/vipra/cmd/ex/LDAAnalyzerException.java
similarity index 89%
rename from vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzerException.java
rename to vipra-cmd/src/main/java/de/vipra/cmd/ex/LDAAnalyzerException.java
index d80081d8..bf55ee83 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzerException.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/ex/LDAAnalyzerException.java
@@ -1,4 +1,4 @@
-package de.vipra.cmd.lda;
+package de.vipra.cmd.ex;
 
 public class LDAAnalyzerException extends Exception {
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/PreprocessorException.java b/vipra-cmd/src/main/java/de/vipra/cmd/ex/PreprocessorException.java
similarity index 89%
rename from vipra-cmd/src/main/java/de/vipra/cmd/text/PreprocessorException.java
rename to vipra-cmd/src/main/java/de/vipra/cmd/ex/PreprocessorException.java
index d8c62b66..1b4f6ade 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/PreprocessorException.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/ex/PreprocessorException.java
@@ -1,4 +1,4 @@
-package de.vipra.cmd.text;
+package de.vipra.cmd.ex;
 
 public class PreprocessorException extends Exception {
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
new file mode 100644
index 00000000..96714d44
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
@@ -0,0 +1,55 @@
+package de.vipra.cmd.file;
+
+import java.io.File;
+import java.io.IOException;
+
+import de.vipra.cmd.ex.FilebaseException;
+import de.vipra.cmd.model.Article;
+import de.vipra.util.Config;
+import de.vipra.util.Config.Key;
+import de.vipra.util.ex.ConfigException;
+
+public abstract class Filebase {
+
+	private final File dataDir;
+	private final FilebaseIndex index;
+
+	public Filebase(File dataDir) throws FilebaseException {
+		this.dataDir = dataDir;
+		try {
+			this.index = new FilebaseIndex(new File(dataDir, "asd"));
+		} catch (IOException e) {
+			throw new FilebaseException("could not read index: " + e.getMessage());
+		}
+	}
+
+	public File getDataDir() {
+		return dataDir;
+	}
+
+	public void writeIndex() throws IOException {
+		index.write();
+	}
+
+	public void remove(Article article) throws FilebaseException {
+		remove(article.getId());
+	}
+
+	public abstract void add(Article article) throws FilebaseException;
+
+	public abstract void remove(String id) throws FilebaseException;
+
+	public abstract void write() throws IOException;
+
+	public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException {
+		File dataDir = config.getDataDirectory();
+		switch (config.getString(Key.ANALYZER).toLowerCase()) {
+			case "ldac":
+				return new LdacFilebase(dataDir);
+			case "jgibb":
+			default:
+				return new JGibbFilebase(dataDir);
+		}
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
new file mode 100644
index 00000000..662f43df
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
@@ -0,0 +1,46 @@
+package de.vipra.cmd.file;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.vipra.util.Constants;
+import de.vipra.util.FileUtils;
+
+public class FilebaseIndex {
+
+	private final File file;
+	private final List<String> index;
+
+	public FilebaseIndex(File file) throws IOException {
+		this.file = file;
+		if (file.exists()) {
+			index = new ArrayList<>(FileUtils.readFile(file));
+		} else {
+			index = new ArrayList<>();
+		}
+	}
+
+	public void write() throws IOException {
+		FileUtils.writeLines(file, Constants.FB_ENCODING.name(), index, null, false);
+	}
+
+	public int add(String id) {
+		int i = indexOf(id);
+		if (i == -1) {
+			index.add(id);
+			i = index.size() - 1;
+		}
+		return i;
+	}
+
+	public int indexOf(String id) {
+		return index.indexOf(id);
+	}
+
+	public boolean remove(String id) {
+		return index.remove(id);
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
new file mode 100644
index 00000000..1dd885ad
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
@@ -0,0 +1,33 @@
+package de.vipra.cmd.file;
+
+import java.io.File;
+import java.io.IOException;
+
+import de.vipra.cmd.ex.FilebaseException;
+import de.vipra.cmd.model.Article;
+
+public class JGibbFilebase extends Filebase {
+
+	public JGibbFilebase(File dataDir) throws FilebaseException {
+		super(dataDir);
+		// TODO Auto-generated constructor stub
+	}
+
+	@Override
+	public void add(Article article) {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void remove(String id) {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void write() throws IOException {
+		writeIndex();
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
new file mode 100644
index 00000000..d6c4bbb9
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
@@ -0,0 +1,33 @@
+package de.vipra.cmd.file;
+
+import java.io.File;
+import java.io.IOException;
+
+import de.vipra.cmd.ex.FilebaseException;
+import de.vipra.cmd.model.Article;
+
+public class LdacFilebase extends Filebase {
+
+	public LdacFilebase(File dataDir) throws FilebaseException {
+		super(dataDir);
+		// TODO Auto-generated constructor stub
+	}
+
+	@Override
+	public void add(Article article) {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void remove(String id) {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void write() throws IOException {
+		writeIndex();
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
index f5f9c4ef..76f29e78 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
@@ -1,13 +1,17 @@
 package de.vipra.cmd.lda;
 
-import de.vipra.cmd.model.Article;
+import de.vipra.cmd.ex.LDAAnalyzerException;
 
-public class JGibbLDAAnalyzer implements LDAAnalyzer {
+public class JGibbLDAAnalyzer extends LDAAnalyzer {
 
 	@Override
-	public Object analyze(Article article) throws LDAAnalyzerException {
+	public String getName() {
+		return "JGibb Analyzer";
+	}
+
+	@Override
+	public void analyze() throws LDAAnalyzerException {
 		// TODO Auto-generated method stub
-		return null;
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
index 0d2f65f1..92885e5b 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
@@ -1,9 +1,23 @@
 package de.vipra.cmd.lda;
 
-import de.vipra.cmd.model.Article;
+import de.vipra.cmd.ex.LDAAnalyzerException;
+import de.vipra.util.Config;
+import de.vipra.util.Config.Key;
 
-public interface LDAAnalyzer {
+public abstract class LDAAnalyzer {
+	
+	public abstract String getName();
 
-	public Object analyze(Article article) throws LDAAnalyzerException;
+	public abstract void analyze() throws LDAAnalyzerException;
+
+	public static LDAAnalyzer getAnalyzer(Config config) {
+		switch (config.getString(Key.ANALYZER).toLowerCase()) {
+			case "ldac":
+				return new LdacLDAAnalyzer();
+			case "jgibb":
+			default:
+				return new JGibbLDAAnalyzer();
+		}
+	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
index e5da35c8..c8431e44 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
@@ -1,13 +1,17 @@
 package de.vipra.cmd.lda;
 
-import de.vipra.cmd.model.Article;
+import de.vipra.cmd.ex.LDAAnalyzerException;
 
-public class LdacLDAAnalyzer implements LDAAnalyzer {
+public class LdacLDAAnalyzer extends LDAAnalyzer {
 
 	@Override
-	public Object analyze(Article article) throws LDAAnalyzerException {
+	public String getName() {
+		return "lda-c Analyzer";
+	}
+
+	@Override
+	public void analyze() throws LDAAnalyzerException {
 		// TODO Auto-generated method stub
-		return null;
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java
index acebb742..aa8c9400 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java
@@ -9,14 +9,14 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import de.vipra.cmd.ExecutionException;
+import de.vipra.cmd.ex.FilebaseException;
+import de.vipra.cmd.file.Filebase;
 import de.vipra.cmd.model.Article;
 import de.vipra.util.Config;
-import de.vipra.util.ConfigException;
 import de.vipra.util.Constants;
+import de.vipra.util.ex.ConfigException;
 import de.vipra.util.ex.DatabaseException;
-import de.vipra.util.ex.FilebaseException;
 import de.vipra.util.service.DatabaseService;
-import de.vipra.util.service.FilebaseService;
 
 public class DeleteCommand implements Command {
 
@@ -26,7 +26,7 @@ public class DeleteCommand implements Command {
 	private ArrayList<String> ids = new ArrayList<>();
 	private Config config;
 	private DatabaseService<Article> dbArticles;
-	private FilebaseService<Article> fbArticles;
+	private Filebase filebase;
 
 	DeleteCommand() {}
 
@@ -56,7 +56,7 @@ public class DeleteCommand implements Command {
 
 		try {
 			// 2. delete file
-			fbArticles.deleteSingle(id);
+			filebase.remove(id);
 		} catch (FilebaseException e) {
 			errors.add(e);
 		}
@@ -72,10 +72,10 @@ public class DeleteCommand implements Command {
 	@Override
 	public void run() throws ExecutionException {
 		try {
-			config = new Config();
-			dbArticles = config.getDatabaseService(Constants.Collection.ARTICLES, Article.class);
-			fbArticles = config.getFilebaseService(Article.class);
-		} catch (IOException | ConfigException e) {
+			config = Config.getConfig();
+			dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, Article.class);
+			filebase = Filebase.getFilebase(config);
+		} catch (IOException | FilebaseException | ConfigException e) {
 			throw new ExecutionException(e);
 		}
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
index d31a7563..5a6559ba 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
@@ -1,6 +1,7 @@
 package de.vipra.cmd.option;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.FilenameFilter;
 import java.io.IOException;
@@ -10,45 +11,25 @@ import java.util.List;
 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
 import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import de.vipra.cmd.ExecutionException;
+import de.vipra.cmd.ex.ImportException;
+import de.vipra.cmd.file.Filebase;
+import de.vipra.cmd.lda.LDAAnalyzer;
 import de.vipra.cmd.model.Article;
-import de.vipra.cmd.text.LucenePreprocessor;
 import de.vipra.cmd.text.Preprocessor;
 import de.vipra.util.Config;
-import de.vipra.util.ConfigException;
 import de.vipra.util.Constants;
 import de.vipra.util.StringUtils;
+import de.vipra.util.Timer;
 import de.vipra.util.model.ArticleStats;
 import de.vipra.util.service.DatabaseService;
-import de.vipra.util.service.FilebaseService;
 
 public class ImportCommand implements Command {
 
-	public class ImportException extends Exception {
-
-		private static final long serialVersionUID = 1L;
-
-		private final String id;
-
-		public ImportException(String msg, String id) {
-			super(msg);
-			this.id = id;
-		}
-
-		public ImportException(Exception e, String id) {
-			super(e);
-			this.id = id;
-		}
-
-		public String getId() {
-			return id;
-		}
-
-	}
-
 	public static final Logger log = LoggerFactory.getLogger(ImportCommand.class);
 	public static final Logger out = LoggerFactory.getLogger("shellout");
 
@@ -56,7 +37,9 @@ public class ImportCommand implements Command {
 	private JSONParser parser = new JSONParser();
 	private Config config;
 	private DatabaseService<Article> dbArticles;
-	private FilebaseService<Article> fbArticles;
+	private Filebase filebase;
+	private Preprocessor preprocessor;
+	private LDAAnalyzer analyzer;
 
 	ImportCommand() {}
 
@@ -95,91 +78,101 @@ public class ImportCommand implements Command {
 		}
 	}
 
-	private void importFile(File file) throws Exception {
-		Object data = parser.parse(new FileReader(file));
-
-		try {
-			importArticles((JSONArray) data);
-		} catch (ClassCastException e) {
-			try {
-				importArticle((JSONObject) data);
-			} catch (ClassCastException e2) {
-				throw new ExecutionException("invalid json file format: " + file.getAbsolutePath());
-			}
-		}
-	}
-
-	private void importArticles(JSONArray array) throws ExecutionException {
-		List<Exception> errors = new ArrayList<>();
-		for (Object object : array) {
-			try {
-				importArticle((JSONObject) object);
-			} catch (ImportException e) {
-				revertImport(e.getId());
-				errors.add(e);
-			} catch (Exception e) {
-				errors.add(e);
-			}
-		}
-		if (errors.size() > 0) {
-			throw new ExecutionException(errors);
-		}
-	}
-
-	void importArticle(JSONObject obj) throws ImportException {
+	/**
+	 * import a single article into the database and filebase
+	 * 
+	 * @param obj
+	 * @return
+	 * @throws ImportException
+	 */
+	Article importArticle(JSONObject obj) throws ImportException {
 		out.info("importing \"" + StringUtils.ellipsize(obj.get("title").toString(), 80) + "\"");
 		Article article = new Article();
 		article.fromJSON(obj);
 
 		try {
-			// 1. preprocess text
-			// process text before topic modeling
-			Preprocessor preprocessor = new LucenePreprocessor();
-			String processedText = preprocessor.preprocess(article.getText());
+			// preprocess text and generate text statistics
+			String preprocessedText = preprocessor.preprocess(article.getText());
+			ArticleStats articleStats = ArticleStats.generateFromText(preprocessedText);
 
-			// 2. generate word statistics
-			article.setStats(ArticleStats.generateFromText(processedText));
-
-			// 3. add article to mongodb
-			// this generates a unique object id
+			// add article to mongodb
+			article.setStats(articleStats);
 			article = dbArticles.createSingle(article);
 
-			// 4. add article to filebase
-			// topic modeling works on files
-			article.setText(processedText);
-			fbArticles.createSingle(article);
+			// add article to filebase
+			article.setText(preprocessedText);
+			filebase.add(article);
+
+			return article;
 		} catch (Exception e) {
 			throw new ImportException(e, article.getId());
 		}
 	}
 
-	private void revertImport(String id) throws ExecutionException {
-		if (id != null) {
-			DeleteCommand cmd = new DeleteCommand();
-			cmd.deleteEntry(id);
+	/**
+	 * Imports a file into the database and the filebase
+	 * 
+	 * @param file
+	 * @throws ParseException
+	 * @throws IOException
+	 * @throws FileNotFoundException
+	 * @throws ImportException
+	 * @throws Exception
+	 */
+	private List<Article> importFile(File file)
+			throws FileNotFoundException, IOException, ParseException, ImportException {
+		Object data = parser.parse(new FileReader(file));
+
+		List<Article> articles = new ArrayList<Article>();
+
+		if (data instanceof JSONArray) {
+			for (Object object : (JSONArray) data) {
+				articles.add(importArticle((JSONObject) object));
+			}
+		} else if (data instanceof JSONObject) {
+			articles.add(importArticle((JSONObject) data));
 		}
+
+		return articles;
 	}
 
 	@Override
 	public void run() throws ExecutionException {
 		try {
-			config = new Config();
-			dbArticles = config.getDatabaseService(Constants.Collection.ARTICLES, Article.class);
-			fbArticles = config.getFilebaseService(Article.class);
-		} catch (IOException | ConfigException e) {
-			throw new ExecutionException(e);
-		}
-
-		List<Exception> ex = new ArrayList<>();
-		for (File file : files) {
-			try {
-				importFile(file);
-			} catch (Exception e) {
-				ex.add(e);
+			config = Config.getConfig();
+			dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, Article.class);
+			filebase = Filebase.getFilebase(config);
+			preprocessor = Preprocessor.getPreprocessor(config);
+			analyzer = LDAAnalyzer.getAnalyzer(config);
+
+			out.info("using data directory: " + filebase.getDataDir().getAbsolutePath());
+			out.info("using preprocessor: " + preprocessor.getName());
+			out.info("using analyzer: " + analyzer.getName());
+
+			Timer timer = new Timer();
+			timer.start();
+
+			// import files into database and filebase
+			List<Article> articles = new ArrayList<>();
+			for (File file : files) {
+				if (file.isFile() && file.exists())
+					articles.addAll(importFile(file));
 			}
-		}
-		if (ex.size() > 0) {
-			throw new ExecutionException(ex);
+			long durImport = timer.lap();
+
+			// do topic modeling
+			analyzer.analyze();
+			long durAnalyze = timer.lap();
+
+			// write file index
+			filebase.writeIndex();
+			long durIndex = timer.lap();
+
+			out.info("imported " + articles.size() + " " + (articles.size() == 1 ? "article" : "articles"));
+			out.info("import: " + StringUtils.timeString(durImport) + ", analyze: " + StringUtils.timeString(durAnalyze)
+					+ ", reindex: " + StringUtils.timeString(durIndex));
+		} catch (Exception e) {
+			throw new ExecutionException(e);
 		}
 	}
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CustomPreprocessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CustomPreprocessor.java
index ba713628..6341c0a8 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/CustomPreprocessor.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CustomPreprocessor.java
@@ -1,31 +1,22 @@
 package de.vipra.cmd.text;
 
-import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
-public class CustomPreprocessor implements Preprocessor {
-
-	public static final HashSet<String> STOPWORDS = new HashSet<>(Arrays.asList(new String[] { "a", "about", "above",
-			"after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because",
-			"been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't",
-			"did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from",
-			"further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's",
-			"her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll",
-			"i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more",
-			"most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other",
-			"ought", "our", "ours  ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll",
-			"she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs",
-			"them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're",
-			"they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we",
-			"we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where",
-			"where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't",
-			"you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" }));
+public class CustomPreprocessor extends Preprocessor {
+
+	private final Set<String> stopWords;
+
+	public CustomPreprocessor(List<String> stopWordsList) {
+		this.stopWords = new HashSet<>(stopWordsList);
+	}
 
 	private String removeStopWords(String text) {
 		String[] words = text.split("\\s+");
 		StringBuilder sb = new StringBuilder();
 		for (String word : words) {
-			if (STOPWORDS.contains(word)) {
+			if (stopWords.contains(word)) {
 				continue;
 			}
 			sb.append(word).append(" ");
@@ -33,6 +24,11 @@ public class CustomPreprocessor implements Preprocessor {
 		return sb.toString().trim();
 	}
 
+	@Override
+	public String getName() {
+		return "Custom Preprocessor";
+	}
+
 	@Override
 	public String preprocess(String input) {
 		input = input.toLowerCase();
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/LucenePreprocessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/LucenePreprocessor.java
index 446e17de..13935c7a 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/LucenePreprocessor.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/LucenePreprocessor.java
@@ -3,6 +3,7 @@ package de.vipra.cmd.text;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -12,14 +13,27 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
 
+import de.vipra.cmd.ex.PreprocessorException;
 import de.vipra.util.StringUtils;
 
-public class LucenePreprocessor implements Preprocessor {
+public class LucenePreprocessor extends Preprocessor {
+
+	private final CharArraySet stopWords;
+
+	public LucenePreprocessor(List<String> stopWords) {
+		this.stopWords = new CharArraySet(stopWords, false);
+	}
+
+	@Override
+	public String getName() {
+		return "Lucene Preprocessor";
+	}
 
 	@Override
 	public String preprocess(String input) throws PreprocessorException {
-		Analyzer analyzer = new StandardAnalyzer();
+		Analyzer analyzer = new StandardAnalyzer(stopWords);
 		TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
 		try {
 			stream.reset();
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java
index 75dcb737..f130cddb 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java
@@ -1,7 +1,32 @@
 package de.vipra.cmd.text;
 
-public interface Preprocessor {
+import java.util.Arrays;
+import java.util.List;
 
-	String preprocess(String input) throws PreprocessorException;
+import de.vipra.cmd.ex.PreprocessorException;
+import de.vipra.util.Config;
+import de.vipra.util.Constants;
+import de.vipra.util.Config.Key;
+
+public abstract class Preprocessor {
+	
+	public abstract String getName();
+
+	public abstract String preprocess(String input) throws PreprocessorException;
+
+	public static Preprocessor getPreprocessor(Config config) {
+		List<String> stopWords = Arrays.asList(config.getString(Key.STOPWORDS).toLowerCase().split(","));
+		if (stopWords.size() == 0) {
+			stopWords = Constants.STOPWORDS;
+		}
+
+		switch (config.getString(Key.PREPROCESSOR)) {
+			case "custom":
+				return new CustomPreprocessor(stopWords);
+			case "lucene":
+			default:
+				return new LucenePreprocessor(stopWords);
+		}
+	}
 
 }
diff --git a/vipra-cmd/src/main/resources/stopwords.txt b/vipra-cmd/src/main/resources/stopwords.txt
deleted file mode 100644
index 1e35caf4..00000000
--- a/vipra-cmd/src/main/resources/stopwords.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-a
-about
-above
-after
-again
-against
-all
-am
-an
-and
-any
-are
-aren't
-as
-at
-be
-because
-been
-before
-being
-below
-between
-both
-but
-by
-can't
-cannot
-could
-couldn't
-did
-didn't
-do
-does
-doesn't
-doing
-don't
-down
-during
-each
-few
-for
-from
-further
-had
-hadn't
-has
-hasn't
-have
-haven't
-having
-he
-he'd
-he'll
-he's
-her
-here
-here's
-hers
-herself
-him
-himself
-his
-how
-how's
-i
-i'd
-i'll
-i'm
-i've
-if
-in
-into
-is
-isn't
-it
-it's
-its
-itself
-let's
-me
-more
-most
-mustn't
-my
-myself
-no
-nor
-not
-of
-off
-on
-once
-only
-or
-other
-ought
-our
-ours	ourselves
-out
-over
-own
-same
-shan't
-she
-she'd
-she'll
-she's
-should
-shouldn't
-so
-some
-such
-than
-that
-that's
-the
-their
-theirs
-them
-themselves
-then
-there
-there's
-these
-they
-they'd
-they'll
-they're
-they've
-this
-those
-through
-to
-too
-under
-until
-up
-very
-was
-wasn't
-we
-we'd
-we'll
-we're
-we've
-were
-weren't
-what
-what's
-when
-when's
-where
-where's
-which
-while
-who
-who's
-whom
-why
-why's
-with
-won't
-would
-wouldn't
-you
-you'd
-you'll
-you're
-you've
-your
-yours
-yourself
-yourselves
\ No newline at end of file
diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java
index 2fbab02b..d0c852b3 100644
--- a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java
+++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java
@@ -25,8 +25,8 @@ import de.vipra.rest.model.Article;
 import de.vipra.rest.model.ResponseWrapper;
 import de.vipra.rest.service.ArticleService;
 import de.vipra.util.Config;
-import de.vipra.util.ConfigException;
 import de.vipra.util.Mongo;
+import de.vipra.util.ex.ConfigException;
 import de.vipra.util.ex.DatabaseException;
 
 @Path("articles")
@@ -38,7 +38,7 @@ public class ArticleResource {
 	final ArticleService service;
 
 	public ArticleResource(@Context ServletContext servletContext) throws ConfigException, IOException {
-		Config config = new Config();
+		Config config = Config.getConfig();
 		Mongo mongo = Mongo.getInstance(config);
 		service = new ArticleService(mongo);
 	}
diff --git a/vipra-util/.settings/org.eclipse.jdt.core.prefs b/vipra-util/.settings/org.eclipse.jdt.core.prefs
index 0e1f9aa3..dad9ba74 100644
--- a/vipra-util/.settings/org.eclipse.jdt.core.prefs
+++ b/vipra-util/.settings/org.eclipse.jdt.core.prefs
@@ -13,7 +13,7 @@ org.eclipse.jdt.core.compiler.source=1.7
 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=48
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
@@ -21,7 +21,7 @@ org.eclipse.jdt.core.formatter.alignment_for_assignment=0
 org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
 org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
 org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=49
 org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
 org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
 org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
@@ -30,7 +30,7 @@ org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
 org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
 org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=48
 org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
diff --git a/vipra-util/pom.xml b/vipra-util/pom.xml
index f0bfa6d7..e78ecfaf 100644
--- a/vipra-util/pom.xml
+++ b/vipra-util/pom.xml
@@ -19,7 +19,7 @@
 			<artifactId>commons-io</artifactId>
 			<version>2.4</version>
 		</dependency>
-		
+
 		<!-- Logging -->
 		<dependency>
 			<groupId>org.apache.logging.log4j</groupId>
diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java
index a2032a4c..6a9a47fe 100644
--- a/vipra-util/src/main/java/de/vipra/util/Config.java
+++ b/vipra-util/src/main/java/de/vipra/util/Config.java
@@ -8,24 +8,37 @@ import java.util.Properties;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import de.vipra.util.model.Model;
-import de.vipra.util.service.DatabaseService;
-import de.vipra.util.service.FilebaseService;
+import de.vipra.util.ex.ConfigException;
 
 public class Config {
 
+	public static enum Key {
+		DBHOST("db.host", Constants.DEFAULT_HOST),
+		DBPORT("db.port", Constants.DEFAULT_PORT),
+		DBNAME("db.name", Constants.DEFAULT_DB),
+		DATADIR("fs.datadir", null),
+		PREPROCESSOR("an.preprocessor", Constants.DEFAULT_PREPROCESSOR.name),
+		ANALYZER("an.analyzer", Constants.DEFAULT_ANALYZER.name),
+		STOPWORDS("an.stopwords", "");
+
+		private final String name;
+		private final Object defVal;
+
+		Key(String name, Object defVal) {
+			this.name = name;
+			this.defVal = defVal;
+		}
+	}
+
 	public static final Logger log = LoggerFactory.getLogger(Config.class);
+	private static Config config;
 
 	private final Properties props = new Properties();
 
-	public Config() throws IOException, ConfigException {
+	private Config() throws IOException, ConfigException {
 		load(FileUtils.getResource(Constants.CONFIG_FILE));
 	}
 
-	public Config(InputStream is) throws IOException, ConfigException {
-		load(is);
-	}
-
 	private void load(InputStream is) throws ConfigException, IOException {
 		if (is == null) {
 			log.error("config file input stream is null");
@@ -35,44 +48,46 @@ public class Config {
 		}
 	}
 
-	public String getString(String key) {
-		return getString(key, null);
-	}
-
-	public String getString(String key, String defaultValue) {
-		return props.getProperty(key, defaultValue);
+	public String getString(Key key) {
+		return props.getProperty(key.name, (String) key.defVal);
 	}
 
-	public Integer getInt(String key) {
-		return getInt(key, null);
-	}
-
-	public Integer getInt(String key, Integer defaultValue) {
+	public Integer getInt(Key key) {
 		try {
-			return Integer.parseInt(props.getProperty(key));
+			return Integer.parseInt(props.getProperty(key.name));
 		} catch (NumberFormatException e) {
-			return defaultValue;
+			return (Integer) key.defVal;
 		}
 	}
 
 	public File getDataDirectory() throws ConfigException {
-		File dataDir = new File(getString("fb.path"));
+		String path = getString(Key.DATADIR);
+		File dataDir;
+		if (path != null) {
+			dataDir = new File(path);
+		} else {
+			dataDir = getGenericDataDir();
+		}
+
 		if (!dataDir.exists()) {
 			if (!dataDir.mkdirs()) {
 				throw new ConfigException("could not create data directory: " + dataDir.getAbsolutePath());
 			}
 		}
+
 		return dataDir;
 	}
 
-	public <T extends Model> DatabaseService<T> getDatabaseService(Constants.Collection collection, Class<T> clazz)
-			throws ConfigException {
-		Mongo mongo = Mongo.getInstance(this);
-		return new DatabaseService<T>(mongo, collection, clazz);
+	public static File getGenericDataDir() {
+		File base = PathUtils.appDataDir();
+		return new File(base, Constants.FB_DIR);
 	}
 
-	public <T extends Model> FilebaseService<T> getFilebaseService(Class<T> clazz) throws ConfigException {
-		return new FilebaseService<T>(getDataDirectory(), clazz);
+	public static Config getConfig() throws IOException, ConfigException {
+		if (config == null) {
+			config = new Config();
+		}
+		return config;
 	}
 
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index 0655844e..c135c812 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -1,7 +1,9 @@
 package de.vipra.util;
 
+import java.util.List;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 public class Constants {
 
@@ -9,14 +11,20 @@ public class Constants {
 	public static final Charset FB_ENCODING = StandardCharsets.UTF_8;
 
 	public static final String CONFIG_FILE = "config.properties";
-	public static final String STOPWORDS_FILE = "stopwords.txt";
 
 	public static final String DEFAULT_HOST = "localhost";
 	public static final int DEFAULT_PORT = 27017;
 	public static final String DEFAULT_DB = "test";
-	
+
 	public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'";
 
+	public static final Preprocessor DEFAULT_PREPROCESSOR = Preprocessor.LUCENE;
+	public static final Analyzer DEFAULT_ANALYZER = Analyzer.JGIBB;
+
+	public static final List<String> STOPWORDS = Arrays.asList("a", "an", "and", "are", "as", "at", "be", "but", "by",
+			"for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their",
+			"then", "there", "these", "they", "this", "to", "was", "will", "with");
+
 	public static enum Collection {
 		ARTICLES("articles");
 
@@ -27,4 +35,26 @@ public class Constants {
 		}
 	}
 
+	public static enum Preprocessor {
+		CUSTOM("custom"),
+		LUCENE("lucene");
+
+		public final String name;
+
+		private Preprocessor(String name) {
+			this.name = name;
+		}
+	}
+
+	public static enum Analyzer {
+		LDAC("ldac"),
+		JGIBB("jgibb");
+
+		public final String name;
+
+		private Analyzer(String name) {
+			this.name = name;
+		}
+	}
+
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/Mongo.java b/vipra-util/src/main/java/de/vipra/util/Mongo.java
index 2973d5f5..943f4383 100644
--- a/vipra-util/src/main/java/de/vipra/util/Mongo.java
+++ b/vipra-util/src/main/java/de/vipra/util/Mongo.java
@@ -6,6 +6,9 @@ import org.slf4j.LoggerFactory;
 import com.mongodb.MongoClient;
 import com.mongodb.client.MongoDatabase;
 
+import de.vipra.util.Config.Key;
+import de.vipra.util.ex.ConfigException;
+
 public class Mongo {
 
 	public static final Logger log = LoggerFactory.getLogger(Mongo.class);
@@ -16,9 +19,9 @@ public class Mongo {
 	private final MongoDatabase database;
 
 	private Mongo(Config config) throws ConfigException {
-		String host = config.getString("db.host", Constants.DEFAULT_HOST);
-		Integer port = config.getInt("db.port", Constants.DEFAULT_PORT);
-		String databaseName = config.getString("db.name", Constants.DEFAULT_DB);
+		String host = config.getString(Key.DBHOST);
+		Integer port = config.getInt(Key.DBPORT);
+		String databaseName = config.getString(Key.DBNAME);
 
 		if (host == null || port == null || databaseName == null) {
 			log.error("host/port/dbname missing in configuration");
diff --git a/vipra-util/src/main/java/de/vipra/util/PathUtils.java b/vipra-util/src/main/java/de/vipra/util/PathUtils.java
index b5979fe2..9b94dffb 100644
--- a/vipra-util/src/main/java/de/vipra/util/PathUtils.java
+++ b/vipra-util/src/main/java/de/vipra/util/PathUtils.java
@@ -4,17 +4,17 @@ import java.io.File;
 
 public class PathUtils {
 
-	public static File userConfigDir() {
+	public static File appDataDir() {
 		String os = System.getProperty("os.name").toUpperCase();
 		File base = null;
 		if (os.contains("WIN")) {
 			base = new File(System.getProperty("APPDATA"));
 		} else if (os.contains("MAC")) {
-			base = new File(System.getProperty("user.home") + File.pathSeparator + "Library" + File.pathSeparator
+			base = new File(System.getProperty("user.home") + File.separator + "Library" + File.separator
 					+ "ApplicationSupport");
-		} else if (os.contains("NIX")) {
+		} else {
 			base = new File(
-					System.getProperty("user.home") + File.pathSeparator + ".local" + File.pathSeparator + "share");
+					System.getProperty("user.home") + File.separator + ".local" + File.separator + "share");
 		}
 		return base;
 	}
diff --git a/vipra-util/src/main/java/de/vipra/util/StringUtils.java b/vipra-util/src/main/java/de/vipra/util/StringUtils.java
index 34a40c14..15f96f9f 100644
--- a/vipra-util/src/main/java/de/vipra/util/StringUtils.java
+++ b/vipra-util/src/main/java/de/vipra/util/StringUtils.java
@@ -1,6 +1,9 @@
 package de.vipra.util;
 
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 public class StringUtils {
 
@@ -12,16 +15,59 @@ public class StringUtils {
 		return input.substring(0, maxLength - ellip.length()).concat(ellip);
 	}
 
-	public static String join(Iterable<String> it) {
+	public static String join(Iterable<String> it, String separator) {
 		Iterator<String> iter = it.iterator();
 		if (iter.hasNext()) {
 			StringBuilder sb = new StringBuilder(iter.next());
 			while (iter.hasNext()) {
-				sb.append(" ").append(iter.next());
+				sb.append(separator).append(iter.next());
 			}
 			return sb.toString();
 		}
 		return "";
 	}
 
+	public static String join(Iterable<String> it) {
+		return join(it, " ");
+	}
+
+	public static String timeString(long nanos) {
+		List<String> parts = new ArrayList<String>(6);
+
+		long days = TimeUnit.NANOSECONDS.toDays(nanos);
+		if (days > 0) {
+			parts.add(days + "d");
+			nanos -= TimeUnit.DAYS.toNanos(days);
+		}
+
+		long hours = TimeUnit.NANOSECONDS.toHours(nanos);
+		if (hours > 0) {
+			parts.add(hours + "h");
+			nanos -= TimeUnit.HOURS.toNanos(hours);
+		}
+
+		long minutes = TimeUnit.NANOSECONDS.toMinutes(nanos);
+		if (minutes > 0) {
+			parts.add(minutes + "m");
+			nanos -= TimeUnit.MINUTES.toNanos(minutes);
+		}
+
+		long seconds = TimeUnit.NANOSECONDS.toSeconds(nanos);
+		if (seconds > 0) {
+			parts.add(seconds + "s");
+			nanos -= TimeUnit.SECONDS.toNanos(seconds);
+		}
+
+		long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
+		if (millis > 0) {
+			parts.add(millis + "ms");
+		}
+
+		if (parts.size() == 0) {
+			parts.add("0ms");
+		}
+
+		return StringUtils.join(parts);
+	}
+
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/Timer.java b/vipra-util/src/main/java/de/vipra/util/Timer.java
new file mode 100644
index 00000000..9ca70c51
--- /dev/null
+++ b/vipra-util/src/main/java/de/vipra/util/Timer.java
@@ -0,0 +1,22 @@
+package de.vipra.util;
+
+public class Timer {
+
+	private long start;
+
+	public long start() {
+		start = System.nanoTime();
+		return start;
+	}
+
+	public long stop() {
+		return System.nanoTime() - start;
+	}
+
+	public long lap() {
+		long lap = System.nanoTime() - start;
+		start = System.nanoTime();
+		return lap;
+	}
+
+}
diff --git a/vipra-util/src/main/java/de/vipra/util/ConfigException.java b/vipra-util/src/main/java/de/vipra/util/ex/ConfigException.java
similarity index 86%
rename from vipra-util/src/main/java/de/vipra/util/ConfigException.java
rename to vipra-util/src/main/java/de/vipra/util/ex/ConfigException.java
index d6404572..02afde92 100644
--- a/vipra-util/src/main/java/de/vipra/util/ConfigException.java
+++ b/vipra-util/src/main/java/de/vipra/util/ex/ConfigException.java
@@ -1,4 +1,4 @@
-package de.vipra.util;
+package de.vipra.util.ex;
 
 public class ConfigException extends Exception {
 
diff --git a/vipra-util/src/main/java/de/vipra/util/model/Model.java b/vipra-util/src/main/java/de/vipra/util/model/Model.java
index db4f5ec1..48725398 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/Model.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/Model.java
@@ -37,8 +37,7 @@ public abstract class Model implements BsonDocument {
 	}
 
 	public void writeToFile(File file) throws IOException {
-		String data = toFileString();
-		FileUtils.writeStringToFile(file, data, Constants.FB_ENCODING, false);
+		FileUtils.writeStringToFile(file, toFileString(), Constants.FB_ENCODING, false);
 	}
 
 	public abstract String getType();
diff --git a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java
index c97fa1e0..f9cd70db 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java
@@ -49,17 +49,17 @@ public class TermFrequency implements BsonDocument {
 	@Override
 	public Document toDocument() {
 		Document document = new Document();
-		document.put("tf", getTermFrequency());
-		document.put("ntf", getNormalizedTermFrequency());
-		document.put("idf", getInverseDocumentFrequency());
+		document.put("termFrequency", getTermFrequency());
+		document.put("normalizedTermFrequency", getNormalizedTermFrequency());
+		document.put("inverseDocumentFrequency", getInverseDocumentFrequency());
 		return document;
 	}
 
 	@Override
 	public void fromDocument(Document document) {
-		setTermFrequency(document.getLong("tf"));
-		setNormalizedTermFrequency(document.getDouble("ntf"));
-		setInverseDocumentFrequency(document.getDouble("idf"));
+		setTermFrequency(document.getLong("termFrequency"));
+		setNormalizedTermFrequency(document.getDouble("normalizedTermFrequency"));
+		setInverseDocumentFrequency(document.getDouble("inverseDocumentFrequency"));
 	}
 
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java
index 51d979bb..c930da6c 100644
--- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java
+++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java
@@ -14,8 +14,10 @@ import com.mongodb.client.model.Filters;
 import com.mongodb.client.result.DeleteResult;
 import com.mongodb.client.result.UpdateResult;
 
+import de.vipra.util.Config;
 import de.vipra.util.Constants;
 import de.vipra.util.Mongo;
+import de.vipra.util.ex.ConfigException;
 import de.vipra.util.ex.DatabaseException;
 import de.vipra.util.model.Model;
 
@@ -100,4 +102,10 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce
 		return result.getModifiedCount();
 	}
 
+	public static <T extends Model> DatabaseService<T> getDatabaseService(Config config,
+			Constants.Collection collection, Class<T> clazz) throws ConfigException {
+		Mongo mongo = Mongo.getInstance(config);
+		return new DatabaseService<T>(mongo, collection, clazz);
+	}
+
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/service/FilebaseService.java b/vipra-util/src/main/java/de/vipra/util/service/FilebaseService.java
deleted file mode 100644
index 49c8ec34..00000000
--- a/vipra-util/src/main/java/de/vipra/util/service/FilebaseService.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package de.vipra.util.service;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import de.vipra.util.ex.FilebaseException;
-import de.vipra.util.model.Model;
-
-public class FilebaseService<T extends Model> implements Service<T, FilebaseException> {
-
-	public static final Logger log = LoggerFactory.getLogger(FilebaseService.class);
-
-	private final File directory;
-	private final Class<T> clazz;
-
-	public FilebaseService(File directory, Class<T> clazz) {
-		this.directory = directory;
-		this.clazz = clazz;
-	}
-
-	private T newT(File file) {
-		try {
-			T t = clazz.newInstance();
-			t.fromFile(file);
-			return t;
-		} catch (InstantiationException | IllegalAccessException | IllegalArgumentException | SecurityException
-				| IOException e) {
-			log.error(e.getMessage());
-			return null;
-		}
-	}
-
-	public File getFile(String id) {
-		return new File(directory, id);
-	}
-
-	@Override
-	public T getSingle(String id) {
-		File file = getFile(id);
-		return newT(file);
-	}
-
-	@Override
-	public T createSingle(T t) throws FilebaseException {
-		if (t.getId() != null) {
-			File file = getFile(t.getId());
-			if (file.exists()) {
-				if (!file.delete()) {
-					log.error("could not delete file for recreation: " + file.getAbsolutePath());
-				}
-			}
-			try {
-				t.writeToFile(file);
-				log.info("file created: " + file.getAbsolutePath());
-			} catch (IOException e) {
-				throw new FilebaseException(e);
-			}
-		}
-		return t;
-	}
-
-	@Override
-	public long deleteSingle(String id) throws FilebaseException {
-		File file = getFile(id);
-		if (file.exists()) {
-			if (file.delete()) {
-				return 1;
-			} else {
-				throw new FilebaseException("could not delete file: " + file.getAbsolutePath());
-			}
-		}
-		return 0;
-	}
-
-	@Override
-	public long updateSingle(T t) throws FilebaseException {
-		File file = getFile(t.getId());
-		if (file.exists()) {
-			try {
-				t.writeToFile(file);
-				return 1;
-			} catch (Exception e) {
-				throw new FilebaseException(e);
-			}
-		}
-		return 0;
-	}
-
-}
diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java
index 824a6a97..df6d2976 100644
--- a/vipra-util/src/main/java/de/vipra/util/service/Service.java
+++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java
@@ -11,5 +11,5 @@ public interface Service<T extends Model, E extends Exception> {
 	long deleteSingle(String id) throws E;
 
 	long updateSingle(T t) throws E;
-
+	
 }
-- 
GitLab