From e7e0a04fd42b6e59d6c1a80c170ab050775e94f9 Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Fri, 19 Feb 2016 19:28:39 +0100
Subject: [PATCH] renamed dtmdateindex to dtmsequenceindex deleted multimap,
 unused fixed corenlp processor, did not use lemmas added minimum word
 frequency to preprocessing, frequencyannotator improved import process
 preparations removed wordmap from import, unused extended articlestats by
 reduced word count, reduction ratio updated websocket handling, added state
 enum and message parsing

---
 .../vipra/rest/resource/SearchResource.java   |   4 +-
 .../de/vipra/rest/resource/TopicResource.java |   2 -
 .../src/main/java/de/vipra/ws/State.java      |  29 +++++
 .../main/java/de/vipra/ws/StateSession.java   |  51 +++++++++
 .../java/de/vipra/ws/StateSessionMap.java     |  20 ++++
 .../src/main/java/de/vipra/ws/WebSocket.java  |  34 +++---
 .../src/main/resources/config.properties      |   5 +-
 vipra-cmd/runcfg/CMD - Import 2.launch        |  18 +++
 .../java/de/vipra/cmd/file/DTMFilebase.java   |  23 ++--
 ...TMDateIndex.java => DTMSequenceIndex.java} |   8 +-
 .../java/de/vipra/cmd/file/DTMVocabulary.java |  27 ++---
 .../java/de/vipra/cmd/file/JGibbFilebase.java |   3 +-
 .../de/vipra/cmd/option/ImportCommand.java    |  34 ++----
 .../de/vipra/cmd/text/CoreNLPProcessor.java   |  39 ++++++-
 .../de/vipra/cmd/text/FrequencyAnnotator.java |  45 ++++++++
 .../java/de/vipra/cmd/text/ProcessedText.java |  24 +++-
 .../de/vipra/cmd/text/StopwordsAnnotator.java |   9 +-
 vipra-ui/app/html/articles/show.html          |   4 +-
 vipra-ui/app/html/index.html                  | 108 +++++++++---------
 vipra-ui/app/html/network.html                |   6 +-
 vipra-ui/app/index.html                       |  12 +-
 vipra-ui/app/js/config.js                     |   4 +-
 vipra-ui/app/js/controllers.js                |   1 +
 vipra-ui/app/less/app.less                    |   3 +
 .../main/java/de/vipra/util/Constants.java    |  17 +--
 .../src/main/java/de/vipra/util/MultiMap.java | 104 -----------------
 .../java/de/vipra/util/model/ArticleFull.java |  33 ++----
 .../de/vipra/util/model/ArticleStats.java     |  33 +++++-
 28 files changed, 387 insertions(+), 313 deletions(-)
 create mode 100644 vipra-backend/src/main/java/de/vipra/ws/State.java
 create mode 100644 vipra-backend/src/main/java/de/vipra/ws/StateSession.java
 create mode 100644 vipra-backend/src/main/java/de/vipra/ws/StateSessionMap.java
 create mode 100644 vipra-cmd/runcfg/CMD - Import 2.launch
 rename vipra-cmd/src/main/java/de/vipra/cmd/file/{DTMDateIndex.java => DTMSequenceIndex.java} (89%)
 create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/text/FrequencyAnnotator.java
 delete mode 100644 vipra-util/src/main/java/de/vipra/util/MultiMap.java

diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java
index a9600df9..314ae153 100644
--- a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java
+++ b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java
@@ -65,8 +65,8 @@ public class SearchResource {
 		SearchResponse response = null;
 		try {
 			response = client.prepareSearch("articles")
-					.setQuery(QueryBuilders.multiMatchQuery(query, "topics^" + Constants.BOOST_TOPICS,
-							"title^" + Constants.BOOST_TITLES, "_all"))
+					.setQuery(QueryBuilders.multiMatchQuery(query, "topics^" + Constants.ES_BOOST_TOPICS,
+							"title^" + Constants.ES_BOOST_TITLES, "_all"))
 					.setFrom(skip).setSize(limit).execute().actionGet();
 		} catch (Exception e) {
 			e.printStackTrace();
diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java
index 619d28c8..6a0d7bde 100644
--- a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java
+++ b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java
@@ -34,7 +34,6 @@ import de.vipra.util.model.Topic;
 import de.vipra.util.model.TopicFull;
 import de.vipra.util.service.MongoService;
 import de.vipra.util.service.Service.QueryBuilder;
-import de.vipra.ws.WebSocket;
 
 @Path("topics")
 public class TopicResource {
@@ -138,7 +137,6 @@ public class TopicResource {
 
 		try {
 			dbTopics.replaceSingle(topic);
-			WebSocket.sendToState("topics.show", "{\"msg\":\"topic updated\"}");
 			return res.ok(topic);
 		} catch (DatabaseException e) {
 			e.printStackTrace();
diff --git a/vipra-backend/src/main/java/de/vipra/ws/State.java b/vipra-backend/src/main/java/de/vipra/ws/State.java
new file mode 100644
index 00000000..ec76e527
--- /dev/null
+++ b/vipra-backend/src/main/java/de/vipra/ws/State.java
@@ -0,0 +1,29 @@
+package de.vipra.ws;
+
+public enum State {
+	INDEX("index"),
+	ABOUT("about"),
+	NETWORK("network"),
+	ARTICLES("articles"),
+	TOPICS("topics"),
+	WORDS("words"),
+	ID(null);
+
+	private final String state;
+
+	State(String state) {
+		this.state = state;
+	}
+
+	public String getState() {
+		return state;
+	}
+
+	public static State find(String str) {
+		for (State state : State.values())
+			if (state.state.equalsIgnoreCase(str))
+				return state;
+		return ID;
+	}
+
+}
diff --git a/vipra-backend/src/main/java/de/vipra/ws/StateSession.java b/vipra-backend/src/main/java/de/vipra/ws/StateSession.java
new file mode 100644
index 00000000..2552bdf6
--- /dev/null
+++ b/vipra-backend/src/main/java/de/vipra/ws/StateSession.java
@@ -0,0 +1,51 @@
+package de.vipra.ws;
+
+import javax.websocket.Session;
+
+public class StateSession {
+
+	private final Session session;
+	private State state;
+
+	public StateSession(Session session) {
+		if (session == null)
+			throw new NullPointerException("session cannot be null");
+		this.session = session;
+	}
+
+	public State getState() {
+		return state;
+	}
+
+	public void setState(State state) {
+		this.state = state;
+	}
+
+	public void setState(String state) {
+		this.state = State.find(state);
+	}
+
+	public Session getSession() {
+		return session;
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (o == null)
+			return false;
+
+		if (o instanceof StateSession)
+			o = ((StateSession) o).getSession();
+
+		if (o instanceof Session)
+			return o.equals(session);
+
+		return false;
+	}
+
+	@Override
+	public int hashCode() {
+		return session.hashCode();
+	}
+
+}
diff --git a/vipra-backend/src/main/java/de/vipra/ws/StateSessionMap.java b/vipra-backend/src/main/java/de/vipra/ws/StateSessionMap.java
new file mode 100644
index 00000000..1d8d5ac8
--- /dev/null
+++ b/vipra-backend/src/main/java/de/vipra/ws/StateSessionMap.java
@@ -0,0 +1,20 @@
+package de.vipra.ws;
+
+import java.util.HashMap;
+import java.util.stream.Stream;
+
+import javax.websocket.Session;
+
+public class StateSessionMap extends HashMap<StateSession, StateSession> {
+
+	private static final long serialVersionUID = 1L;
+
+	public Stream<Session> stream(State state) {
+		return this.entrySet().stream().filter(s -> s.getKey().getState() == state).map(s -> s.getKey().getSession());
+	}
+
+	public void add(StateSession session) {
+		this.put(session, session);
+	}
+
+}
diff --git a/vipra-backend/src/main/java/de/vipra/ws/WebSocket.java b/vipra-backend/src/main/java/de/vipra/ws/WebSocket.java
index 5e7fa899..46a2b58b 100644
--- a/vipra-backend/src/main/java/de/vipra/ws/WebSocket.java
+++ b/vipra-backend/src/main/java/de/vipra/ws/WebSocket.java
@@ -1,9 +1,6 @@
 package de.vipra.ws;
 
 import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
 
 import javax.websocket.OnClose;
 import javax.websocket.OnError;
@@ -16,10 +13,10 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
-import de.vipra.util.MultiMap;
 import de.vipra.ws.msg.InitMessage;
 import de.vipra.ws.msg.WebSocketMessage;
 
@@ -29,13 +26,12 @@ public class WebSocket {
 	public static final Logger log = LogManager.getLogger(WebSocket.class);
 
 	public static final ObjectMapper mapper = new ObjectMapper();
-	public static final Set<Session> sessions = new HashSet<>();
-	public static final MultiMap<String, Session> states = new MultiMap<>();
+	public static final StateSessionMap sessions = new StateSessionMap();
 
 	@OnOpen
 	public void open(Session session) {
 		log.debug("connect");
-		sessions.add(session);
+		sessions.add(new StateSession(session));
 	}
 
 	@OnClose
@@ -45,7 +41,9 @@ public class WebSocket {
 	}
 
 	@OnError
-	public void onError(Throwable error) {}
+	public void onError(Throwable error) {
+		log.error(error);
+	}
 
 	@OnMessage
 	public void handleMessage(String input, Session session)
@@ -65,20 +63,18 @@ public class WebSocket {
 
 	public void handleInitMessage(InitMessage message, Session session) {
 		log.debug("init message received. state = " + message.getState());
-		states.put(message.getState(), session);
+		sessions.get(session).setState(message.getState());
 	}
 
-	public static void sendToState(String state, String message) {
-		Collection<Session> sessions = states.get(state);
-		if (sessions != null) {
-			for (Session session : sessions) {
-				try {
-					session.getBasicRemote().sendText(message);
-				} catch (IOException e) {
-					log.error(e);
-				}
-			}
+	public static void sendToState(State state, Object message) {
+		String json;
+		try {
+			json = mapper.writeValueAsString(message);
+		} catch (JsonProcessingException e) {
+			log.error(e);
+			return;
 		}
+		sessions.stream(state).forEach(s -> s.getAsyncRemote().sendText(json));
 	}
 
 }
diff --git a/vipra-backend/src/main/resources/config.properties b/vipra-backend/src/main/resources/config.properties
index 0778073f..07030840 100644
--- a/vipra-backend/src/main/resources/config.properties
+++ b/vipra-backend/src/main/resources/config.properties
@@ -1,6 +1,3 @@
 db.host=localhost
 db.port=27017
-db.name=test
-tm.processor=corenlp
-tm.analyzer=jgibb
-tm.saveallwords=false
\ No newline at end of file
+db.name=test
\ No newline at end of file
diff --git a/vipra-cmd/runcfg/CMD - Import 2.launch b/vipra-cmd/runcfg/CMD - Import 2.launch
new file mode 100644
index 00000000..89c246dd
--- /dev/null
+++ b/vipra-cmd/runcfg/CMD - Import 2.launch	
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
+<listEntry value="org.eclipse.debug.ui.launchGroup.run"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-2.json"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/>
+</launchConfiguration>
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMFilebase.java
index 8d169228..4e3cccb3 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMFilebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMFilebase.java
@@ -10,7 +10,7 @@ import java.util.Iterator;
 import java.util.List;
 
 import de.vipra.cmd.ex.FilebaseException;
-import de.vipra.cmd.file.DTMDateIndex.DTMDateIndexEntry;
+import de.vipra.cmd.file.DTMSequenceIndex.DTMDateIndexEntry;
 import de.vipra.util.Config;
 import de.vipra.util.Constants;
 import de.vipra.util.FileUtils;
@@ -19,10 +19,9 @@ import de.vipra.util.model.ArticleFull;
 
 public class DTMFilebase extends Filebase {
 
-	public static final String FILE_MODEL = "dtm-mult.dat";
-	public static final String FILE_VOCAB = "vocab";
+	public static final String FILE_NAME = "dtm-mult.dat";
 
-	private final DTMDateIndex dateindex;
+	private final DTMSequenceIndex seqindex;
 	private final DTMVocabulary vocab;
 	private final File modelFile;
 
@@ -37,7 +36,7 @@ public class DTMFilebase extends Filebase {
 
 		File modelDir = getModelDir();
 		try {
-			this.dateindex = new DTMDateIndex(modelDir, config.windowResolution, false);
+			this.seqindex = new DTMSequenceIndex(modelDir, config.windowResolution, false);
 		} catch (IOException | ParseException e) {
 			throw new FilebaseException("could not read date index file", e);
 		}
@@ -47,24 +46,22 @@ public class DTMFilebase extends Filebase {
 		} catch (IOException e) {
 			throw new FilebaseException("could not read vocabulary file", e);
 		}
-		this.modelFile = getModelFile(FILE_MODEL);
+		this.modelFile = getModelFile(FILE_NAME);
 	}
 
 	@Override
 	public synchronized void write(List<ArticleFull> articles) throws IOException {
 		if (!articles.isEmpty()) {
-			// index new articles
-			for (ArticleFull article : articles) {
-				dateindex.add(article.getDate(), vocab.indexText(article.getProcessedText()));
-			}
+			for (ArticleFull article : articles)
+				seqindex.add(article.getDate(), vocab.transform(article.getProcessedText()));
 
 			// write temp file
-			File modelFileTmp = getModelFile(FILE_MODEL + ".tmp");
+			File modelFileTmp = getModelFile(FILE_NAME + ".tmp");
 			Iterator<String> lines = null;
 			if (modelFile.exists())
 				lines = FileUtils.iterateFileLines(modelFile);
 			BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(modelFileTmp)));
-			for (DTMDateIndexEntry e : dateindex) {
+			for (DTMDateIndexEntry e : seqindex) {
 				if (e.exists) {
 					if (lines == null) {
 						writer.close();
@@ -95,7 +92,7 @@ public class DTMFilebase extends Filebase {
 
 		// write vocabulary and windows
 		vocab.close();
-		dateindex.close();
+		seqindex.close();
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMDateIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMSequenceIndex.java
similarity index 89%
rename from vipra-cmd/src/main/java/de/vipra/cmd/file/DTMDateIndex.java
rename to vipra-cmd/src/main/java/de/vipra/cmd/file/DTMSequenceIndex.java
index c9a47040..7dc6b222 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMDateIndex.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMSequenceIndex.java
@@ -20,9 +20,9 @@ import de.vipra.util.Constants;
 import de.vipra.util.Constants.WindowResolution;
 import de.vipra.util.FileUtils;
 
-public class DTMDateIndex implements Closeable, Iterable<DTMDateIndex.DTMDateIndexEntry> {
+public class DTMSequenceIndex implements Closeable, Iterable<DTMSequenceIndex.DTMDateIndexEntry> {
 
-	public static final String FILE_WINDOWS = "dtm-seq.dat";
+	public static final String FILE_NAME = "dtm-seq.dat";
 
 	public static class DTMDateIndexEntry implements Comparable<DTMDateIndexEntry> {
 		public Date date;
@@ -50,7 +50,7 @@ public class DTMDateIndex implements Closeable, Iterable<DTMDateIndex.DTMDateInd
 	private static List<DTMDateIndexEntry> entries;
 	private static SimpleDateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT);
 
-	public DTMDateIndex(File modelDir, WindowResolution wr, boolean reread) throws IOException, ParseException {
+	public DTMSequenceIndex(File modelDir, WindowResolution wr, boolean reread) throws IOException, ParseException {
 		this.file = new File(modelDir, "dates");
 		windowResolution = wr;
 		if (file.exists()) {
@@ -99,7 +99,7 @@ public class DTMDateIndex implements Closeable, Iterable<DTMDateIndex.DTMDateInd
 		writer.close();
 
 		// write window index
-		File seqFile = new File(file.getParentFile(), FILE_WINDOWS);
+		File seqFile = new File(file.getParentFile(), FILE_NAME);
 		writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(seqFile, false)));
 		writer.write(Integer.toString(windows.size()));
 		writer.write(Constants.LINE_SEP);
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMVocabulary.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMVocabulary.java
index 72399624..fc056a3a 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMVocabulary.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/DTMVocabulary.java
@@ -4,7 +4,6 @@ import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -16,13 +15,15 @@ import de.vipra.util.FileUtils;
 
 public class DTMVocabulary implements Closeable, Iterable<String> {
 
+	public static final String FILE_NAME = "vocab";
+
 	private File file;
 	private static List<String> vocables;
 	private static Map<String, Integer> vocablesMap;
 	private static int nextIndex = 1;
 
 	public DTMVocabulary(File modelDir, boolean reread) throws IOException {
-		this.file = new File(modelDir, "vocab");
+		this.file = new File(modelDir, FILE_NAME);
 		if (file.exists()) {
 			if (vocables == null || reread)
 				vocables = new ArrayList<>(FileUtils.readFile(file));
@@ -42,20 +43,7 @@ public class DTMVocabulary implements Closeable, Iterable<String> {
 		FileUtils.writeLines(file, Constants.FILEBASE_ENCODING.name(), vocables, null, false);
 	}
 
-	public void addVocabulary(String text) {
-		addVocabulary(text.split("\\s+"));
-	}
-
-	public void addVocabulary(String[] text) {
-		for (String word : text) {
-			if (!vocablesMap.containsKey(word)) {
-				vocablesMap.put(word, nextIndex++);
-				vocables.add(word);
-			}
-		}
-	}
-
-	public int index(String word) {
+	private int index(String word) {
 		Integer index = vocablesMap.get(word);
 		if (index == null) {
 			index = nextIndex++;
@@ -69,11 +57,10 @@ public class DTMVocabulary implements Closeable, Iterable<String> {
 		return vocablesMap.size();
 	}
 
-	public String indexText(String in) {
+	public String transform(String[] words) {
 		// count unique words
-		List<String> wordList = Arrays.asList(in.split("\\s+"));
-		Map<String, Integer> wordMap = new HashMap<>(wordList.size());
-		for (String word : wordList) {
+		Map<String, Integer> wordMap = new HashMap<>(words.length);
+		for (String word : words) {
 			Integer count = wordMap.get(word);
 			if (count == null)
 				wordMap.put(word, 1);
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
index 5f205147..f15c2908 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
@@ -9,6 +9,7 @@ import java.util.List;
 
 import de.vipra.cmd.ex.FilebaseException;
 import de.vipra.util.model.ArticleFull;
+import edu.stanford.nlp.util.StringUtils;
 
 public class JGibbFilebase extends Filebase {
 
@@ -24,7 +25,7 @@ public class JGibbFilebase extends Filebase {
 		if (!articles.isEmpty()) {
 			BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(modelFile)));
 			for (ArticleFull article : articles)
-				writer.write(article.getProcessedText() + "\n");
+				writer.write(StringUtils.join(article.getProcessedText()) + "\n");
 			writer.close();
 		}
 	}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
index 5c49c928..55163b1a 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
@@ -7,7 +7,6 @@ import java.io.FilenameFilter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
@@ -29,12 +28,10 @@ import de.vipra.util.Config;
 import de.vipra.util.Constants;
 import de.vipra.util.StringUtils;
 import de.vipra.util.Timer;
-import de.vipra.util.WordMap;
 import de.vipra.util.ex.ConfigException;
 import de.vipra.util.ex.DatabaseException;
 import de.vipra.util.model.ArticleFull;
 import de.vipra.util.model.ArticleStats;
-import de.vipra.util.model.Word;
 import de.vipra.util.service.MongoService;
 
 public class ImportCommand implements Command {
@@ -79,24 +76,29 @@ public class ImportCommand implements Command {
 
 		@Override
 		public void run() {
-			log.info("importing \"" + object.get("title") + "\"");
 			ArticleFull article = articleFromJSON(object);
 
 			try {
 				// preprocess text
 				ProcessedText processedText = processor.process(article.getText());
+				article.setProcessedText(processedText.getWords());
 
-				// generate text stats
-				ArticleStats articleStats = ArticleStats.generateFromText(processedText.getText());
+				// generate article stats
+				ArticleStats stats = new ArticleStats();
+				stats.setWordCount(processedText.getWordCount());
+				stats.setProcessedWordCount(processedText.getReducedWordCount());
+				stats.setReductionRatio(processedText.getReductionRatio());
+				article.setStats(stats);
 
 				// add article to mongodb
-				article.setProcessedText(processedText.getText());
-				article.setStats(articleStats);
 				buffer.add(article);
 
 				// add article to filebase if long enough
-				if (processedText.getWords().length >= Constants.DOCUMENT_MIN_LENGTH)
+				if (processedText.getReducedWordCount() >= Constants.DOCUMENT_MIN_LENGTH)
 					filebase.add(article);
+
+				log.info("imported \"" + object.get("title") + "\"\r\n └ text reduction: "
+						+ (processedText.getReductionRatio() * 100) + "%");
 			} catch (ProcessorException e) {
 				log.error("could not preprocess text of article '" + article.getTitle() + "'");
 			} catch (DatabaseException e) {
@@ -128,10 +130,8 @@ public class ImportCommand implements Command {
 	private JSONParser parser = new JSONParser();
 	private Config config;
 	private MongoService<ArticleFull, ObjectId> dbArticles;
-	private MongoService<Word, String> dbWords;
 	private Filebase filebase;
 	private Processor processor;
-	private WordMap wordMap;
 	private ArticleBuffer buffer;
 	private ExecutorService executor;
 
@@ -229,10 +229,8 @@ public class ImportCommand implements Command {
 		int threadCount = Runtime.getRuntime().availableProcessors() * 10;
 		config = Config.getConfig();
 		dbArticles = MongoService.getDatabaseService(config, ArticleFull.class);
-		dbWords = MongoService.getDatabaseService(config, Word.class);
 		filebase = Filebase.getFilebase(config);
 		processor = Processor.getProcessor(config);
-		wordMap = new WordMap(dbWords);
 		buffer = new ArticleBuffer(dbArticles);
 		executor = Executors.newFixedThreadPool(threadCount);
 
@@ -260,20 +258,10 @@ public class ImportCommand implements Command {
 		filebase.close();
 		timer.lap("filebase write");
 
-		/*
-		 * save words
-		 */
-		log.info("saving words");
-		Set<Word> importedWords = wordMap.getNewWords();
-		wordMap.create();
-		timer.lap("saving words");
-
 		/*
 		 * run information
 		 */
 		log.info("imported " + imported + " new " + StringUtils.quantity(imported, "article"));
-		int newWordsCount = importedWords.size();
-		log.info("imported " + newWordsCount + " new " + StringUtils.quantity(newWordsCount, "word"));
 		log.info(timer.toString());
 		log.info("done in " + StringUtils.timeString(timer.total()));
 	}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java
index 620c9189..3e4ab991 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java
@@ -3,7 +3,12 @@ package de.vipra.cmd.text;
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
 import de.vipra.cmd.ex.ProcessorException;
+import de.vipra.util.Constants;
+import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
@@ -14,6 +19,8 @@ import edu.stanford.nlp.util.StringUtils;
 
 public class CoreNLPProcessor extends Processor {
 
+	public static final Logger log = LogManager.getLogger(CoreNLPProcessor.class);
+
 	private StanfordCoreNLP nlp;
 
 	public CoreNLPProcessor(List<String> stopWordsList) {
@@ -21,7 +28,14 @@ public class CoreNLPProcessor extends Processor {
 
 		Properties props = new Properties();
 		props.setProperty("customAnnotatorClass.stopwords", StopwordsAnnotator.class.getCanonicalName());
-		props.setProperty("annotators", "tokenize, ssplit, stopwords, pos, lemma");
+		props.setProperty("customAnnotatorClass.frequency", FrequencyAnnotator.class.getCanonicalName());
+		// tokenize: transform words to tokens
+		// ssplit: split by and group into sentences
+		// stopwords: mark stopwords
+		// frequency: count word frequency
+		// pos: mark word position
+		// lemma: lemmatize words
+		props.setProperty("annotators", "tokenize, ssplit, stopwords, pos, lemma, frequency");
 		props.setProperty("stopwords", StringUtils.join(stopWordsList));
 
 		nlp = new StanfordCoreNLP(props);
@@ -32,17 +46,30 @@ public class CoreNLPProcessor extends Processor {
 		Annotation doc = new Annotation(input.toLowerCase());
 		nlp.annotate(doc);
 		StringBuilder sb = new StringBuilder();
-		List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
-		for (CoreMap sentence : sentences) {
+		long wordCount = 0;
+		// loop sentences
+		for (CoreMap sentence : doc.get(SentencesAnnotation.class)) {
 			List<CoreLabel> words = sentence.get(TokensAnnotation.class);
+			// count words
+			wordCount += words.size();
+			// loop words
 			for (CoreLabel word : words) {
+				// filter out stopwords
 				Boolean b = word.get(StopwordsAnnotator.class);
-				if (b == null || !b)
-					sb.append(word.word()).append(" ");
+				if (b == null || !b) {
+					// filter out infrequent words
+					Long count = word.get(FrequencyAnnotator.class);
+					if (count != null && count >= Constants.DOCUMENT_MIN_WORD_FREQ) {
+						String lemma = word.get(LemmaAnnotation.class);
+						// collect unique words
+						sb.append(lemma).append(" ");
+					}
+				}
 			}
 		}
+
 		String text = clean(sb.toString());
-		return new ProcessedText(text);
+		return new ProcessedText(text, wordCount);
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/FrequencyAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/FrequencyAnnotator.java
new file mode 100644
index 00000000..8f339c49
--- /dev/null
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/FrequencyAnnotator.java
@@ -0,0 +1,45 @@
+package de.vipra.cmd.text;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import edu.stanford.nlp.ling.CoreAnnotation;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.Annotator;
+
+public class FrequencyAnnotator implements Annotator, CoreAnnotation<Long> {
+
+	public static final String NAME = "frequency";
+
+	@Override
+	public void annotate(Annotation annotation) {
+		List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
+		Map<String, Long> words = tokens.stream()
+				.collect(Collectors.groupingBy(p -> p.get(LemmaAnnotation.class), Collectors.counting()));
+		for (CoreLabel token : tokens) {
+			token.set(FrequencyAnnotator.class, words.get(token.get(LemmaAnnotation.class)));
+		}
+	}
+
+	@Override
+	public Set<Requirement> requirementsSatisfied() {
+		return Collections.singleton(new Requirement(NAME));
+	}
+
+	@Override
+	public Set<Requirement> requires() {
+		return TOKENIZE_SSPLIT_POS_LEMMA;
+	}
+
+	@Override
+	public Class<Long> getType() {
+		return Long.class;
+	}
+
+}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java
index 84d3b6b0..dcc84251 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java
@@ -1,21 +1,33 @@
 package de.vipra.cmd.text;
 
-import de.vipra.util.StringUtils;
-
-public final class ProcessedText {
+public class ProcessedText {
 
 	private final String[] words;
+	private final long originalWordCount;
+	private final long reducedWordCount;
+	private final double reductionRatio;
 
-	public ProcessedText(String text) {
+	public ProcessedText(String text, long wordCount) {
 		this.words = text.split("\\s+");
+		this.originalWordCount = wordCount;
+		this.reducedWordCount = this.words.length;
+		this.reductionRatio = 1 - ((double) reducedWordCount / wordCount);
 	}
 
 	public String[] getWords() {
 		return words;
 	}
 
-	public String getText() {
-		return StringUtils.join(words);
+	public long getWordCount() {
+		return originalWordCount;
+	}
+
+	public long getReducedWordCount() {
+		return reducedWordCount;
+	}
+
+	public double getReductionRatio() {
+		return reductionRatio;
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java
index d2701fee..7d5ab90a 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java
@@ -8,8 +8,8 @@ import java.util.Properties;
 import java.util.Set;
 
 import edu.stanford.nlp.ling.CoreAnnotation;
-import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.Annotator;
 
@@ -21,16 +21,13 @@ public class StopwordsAnnotator implements Annotator, CoreAnnotation<Boolean> {
 
 	public StopwordsAnnotator(String input, Properties props) {
 		stopWords = new HashSet<String>(Arrays.asList(props.getProperty(NAME).split(" ")));
+		stopWords.addAll(Arrays.asList("-LRB-", "-RRB-", "-LSB-", "-RSB-", "-LCB-", "-RCB-"));
 	}
 
 	@Override
 	public void annotate(Annotation annotation) {
 		List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
-		for (CoreLabel token : tokens) {
-			if (stopWords.contains(token.word())) {
-				token.set(StopwordsAnnotator.class, true);
-			}
-		}
+		tokens.stream().filter(t -> stopWords.contains(t.word())).forEach(t -> t.set(StopwordsAnnotator.class, true));
 	}
 
 	@Override
diff --git a/vipra-ui/app/html/articles/show.html b/vipra-ui/app/html/articles/show.html
index 80054488..00b486cf 100644
--- a/vipra-ui/app/html/articles/show.html
+++ b/vipra-ui/app/html/articles/show.html
@@ -5,7 +5,9 @@
     <table class="item-actions">
       <tr>
         <td>
-          <a class="btn btn-default" ui-sref="network({type:'articles', id:article.id})">Network graph</a>
+          <a class="btn btn-default" ui-sref="network({type:'articles', id:article.id})">
+            Network graph
+          </a>
         </td>
       </tr>
     </table>
diff --git a/vipra-ui/app/html/index.html b/vipra-ui/app/html/index.html
index d6be6e5e..cbbe697b 100644
--- a/vipra-ui/app/html/index.html
+++ b/vipra-ui/app/html/index.html
@@ -1,66 +1,62 @@
 <div ng-cloak>
-  <div class="container">
-
-    <div class="row" ng-hide="search">
-      <div class="col-md-12">
-        <div class="heading"></div>
-      </div>
+  <div class="row" ng-hide="search">
+    <div class="col-md-12">
+      <div class="heading"></div>
     </div>
+  </div>
 
-    <div class="row" ng-hide="search">
-      <div class="col-md-6 text-center">
-        <h4>Latest articles</h4>
-        <ul class="list-unstyled">
-          <li class="ellipsize" ng-repeat="article in latestArticles">
-            <a ui-sref="articles.show({id:article.id})" ng-bind="article.title"></a>
-          </li>
-        </ul>
-      </div>
-      <div class="col-md-3 text-center">
-        <h4>Latest topics</h4>
-        <ul class="list-unstyled">
-          <li class="ellipsize" ng-repeat="topic in latestTopics">
-            <a ui-sref="topics.show({id:topic.id})" ng-bind="topic.name"></a>
-          </li>
-        </ul>
-      </div>
-      <div class="col-md-3 text-center">
-        <h4>Latest words</h4>
-        <ul class="list-unstyled">
-          <li class="ellipsize" ng-repeat="word in latestWords">
-            <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a>
-          </li>
-        </ul>
-      </div>
+  <div class="row" ng-hide="search">
+    <div class="col-md-6 text-center">
+      <h4>Latest articles</h4>
+      <ul class="list-unstyled">
+        <li class="ellipsize" ng-repeat="article in latestArticles">
+          <a ui-sref="articles.show({id:article.id})" ng-bind="article.title"></a>
+        </li>
+      </ul>
     </div>
-
-    <div class="row row-spaced">
-      <div class="col-md-12">
-        <input type="text" class="form-control input-lg" placeholder="Search..." ng-model="search" ng-model-options="{debounce:500}">
-      </div>
+    <div class="col-md-3 text-center">
+      <h4>Latest topics</h4>
+      <ul class="list-unstyled">
+        <li class="ellipsize" ng-repeat="topic in latestTopics">
+          <a ui-sref="topics.show({id:topic.id})" ng-bind="topic.name"></a>
+        </li>
+      </ul>
     </div>
+    <div class="col-md-3 text-center">
+      <h4>Latest words</h4>
+      <ul class="list-unstyled">
+        <li class="ellipsize" ng-repeat="word in latestWords">
+          <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a>
+        </li>
+      </ul>
+    </div>
+  </div>
 
-    <div class="row row-spaced">
-      <div class="text-center" ng-show="searching">
-        Searching...
-      </div>
-      <div class="col-md-12" ng-show="!searching && search && (!searchResults || searchResults.length == 0)">
-        <h4>No Results</h4>
-      </div>
-      <div class="col-md-12" ng-show="searchResults.length > 0">
-        <h4>Results</h4>
-        <ul class="list-unstyled search-results">
-          <li class="search-result" ng-repeat="article in searchResults">
-            <a ui-sref="articles.show({id:article.id})" ng-bind="article.title"></a>
-            <p>
-              <span class="text" ng-bind="article.text"></span>
-              <br>
-              <small class="text-muted" ng-bind-template="{{article.meta.score | toPercent}}% &ndash; {{article.date | formatDate}}"></small>
-            </p>
-          </li>
-        </ul>
-      </div>
+  <div class="row row-spaced">
+    <div class="col-md-12">
+      <input type="text" class="form-control input-lg" placeholder="Search..." ng-model="search" ng-model-options="{debounce:500}">
     </div>
+  </div>
 
+  <div class="row row-spaced">
+    <div class="text-center" ng-show="searching">
+      Searching...
+    </div>
+    <div class="col-md-12" ng-show="!searching && search && (!searchResults || searchResults.length == 0)">
+      <h4>No Results</h4>
+    </div>
+    <div class="col-md-12" ng-show="searchResults.length > 0">
+      <h4>Results</h4>
+      <ul class="list-unstyled search-results">
+        <li class="search-result" ng-repeat="article in searchResults">
+          <a ui-sref="{{::articles.show}}({id:article.id})" ng-bind="article.title"></a>
+          <p>
+            <span class="text" ng-bind="article.text"></span>
+            <br>
+            <small class="text-muted" ng-bind-template="{{article.meta.score | toPercent}}% &ndash; {{article.date | formatDate}}"></small>
+          </p>
+        </li>
+      </ul>
+    </div>
   </div>
 </div>
\ No newline at end of file
diff --git a/vipra-ui/app/html/network.html b/vipra-ui/app/html/network.html
index b1a8667a..449bed47 100644
--- a/vipra-ui/app/html/network.html
+++ b/vipra-ui/app/html/network.html
@@ -2,13 +2,13 @@
   <div class="fullsize navpadding">
     <div class="graph-legend overlay">
       <label style="color:{{colors.articles}}">
-        <input type="checkbox" ng-model="shown.articles" store-value="showArticles"> Articles
+        <input type="checkbox" ng-model="shown.articles" store-value="showArticles" store-default="type == 'articles'" ng-disabled="type == 'articles'"> Articles
       </label>
       <label style="color:{{colors.topics}}">
-        <input type="checkbox" ng-model="shown.topics" store-value="showTopics" store-default="true"> Topics
+        <input type="checkbox" ng-model="shown.topics" store-value="showTopics" store-default="true" ng-disabled="type == 'topics'"> Topics
       </label>
       <label style="color:{{colors.words}}">
-        <input type="checkbox" ng-model="shown.words" store-value="showWords" store-default="true"> Words
+        <input type="checkbox" ng-model="shown.words" store-value="showWords" store-default="true" ng-disabled="type == 'words'"> Words
       </label>
     </div>
     <div class="fullsize navpadding" id="visgraph"></div>
diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html
index 30b113bd..b4995c13 100644
--- a/vipra-ui/app/index.html
+++ b/vipra-ui/app/index.html
@@ -51,9 +51,15 @@
         <!-- Collect the nav links, forms, and other content for toggling -->
         <div class="collapse navbar-collapse" id="vipra-navbar-collapse-1">
           <ul class="nav navbar-nav">
-            <li ng-class="{active:$state.includes('articles')}"><a ui-sref="articles">Articles</a></li>
-            <li ng-class="{active:$state.includes('topics')}"><a ui-sref="topics">Topics</a></li>
-            <li ng-class="{active:$state.includes('words')}"><a ui-sref="words">Words</a></li>
+            <li ng-class="{active:$state.includes('articles')}">
+              <a ui-sref="articles">Articles</a>
+            </li>
+            <li ng-class="{active:$state.includes('topics')}">
+              <a ui-sref="topics">Topics</a>
+            </li>
+            <li ng-class="{active:$state.includes('words')}">
+              <a ui-sref="words">Words</a>
+            </li>
           </ul>
 
           <ul class="nav navbar-nav navbar-right">
diff --git a/vipra-ui/app/js/config.js b/vipra-ui/app/js/config.js
index e57dd2d1..643b5e33 100644
--- a/vipra-ui/app/js/config.js
+++ b/vipra-ui/app/js/config.js
@@ -3,8 +3,8 @@
   window.Vipra = window.Vipra || {};
 
   Vipra.config = {
-    restUrl: '//' + location.hostname + ':8080/vipra/rest',
-    websocketUrl: 'ws://' + location.hostname + ':8080/vipra/ws'
+    restUrl: '//' + location.hostname + ':8000/vipra/rest',
+    websocketUrl: 'ws://' + location.hostname + ':8000/vipra/ws'
   };
 
 })();
\ No newline at end of file
diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js
index 326c1679..684e4336 100644
--- a/vipra-ui/app/js/controllers.js
+++ b/vipra-ui/app/js/controllers.js
@@ -85,6 +85,7 @@
       nodes: $scope.nodes,
       edges: $scope.edges
     };
+    $scope.type = $stateParams.type;
     $scope.options = {
       nodes: {
         font: { size: 14 },
diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less
index cd5b31d1..4ed64e66 100644
--- a/vipra-ui/app/less/app.less
+++ b/vipra-ui/app/less/app.less
@@ -1,3 +1,5 @@
+@basecolor: #007aa3;
+
 html {
   position: relative;
   min-height: 100%;
@@ -57,6 +59,7 @@ ul.dashed {
       &> a,
       &> a:hover,
       &> a:focus {
+        border-color: @basecolor !important;
         border-bottom: 3px solid;
         padding-bottom: 12px;
       }
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index 9e9fdf9a..9f431ebb 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -47,12 +47,12 @@ public class Constants {
 	/**
 	 * Topic boost parameter. Boosts topic importance in queries.
 	 */
-	public static final int BOOST_TOPICS = 4;
+	public static final int ES_BOOST_TOPICS = 4;
 
 	/**
 	 * Title boost parameter. Boosts title importance in queries.
 	 */
-	public static final int BOOST_TITLES = 2;
+	public static final int ES_BOOST_TITLES = 2;
 
 	/*
 	 * TOPIC MODELING
@@ -74,7 +74,7 @@ public class Constants {
 	 * Number of words in a discovered topic, if the selected topic modeling
 	 * library supports this parameter.
 	 */
-	public static final int K_TOPIC_WORDS = 80;
+	public static final int K_TOPIC_WORDS = 50;
 
 	/**
 	 * Precision of likeliness numbers. Likeliness is calculated for words to
@@ -89,15 +89,16 @@ public class Constants {
 	public static final double TOPIC_THRESHOLD = 0.01;
 
 	/**
-	 * Minumum number of words per document.
+	 * Minimum word frequency for words to be used for topic modeling. All words
+	 * below this frequency in a document are filtered out before generating the
+	 * topic model.
 	 */
-	public static final int DOCUMENT_MIN_LENGTH = 10;
+	public static final int DOCUMENT_MIN_WORD_FREQ = 20;
 
 	/**
-	 * Set this to true to save all found words in the database. If false, will
-	 * save only topic related words found by topic modeling.
+	 * Minumum number of words per document.
 	 */
-	public static final boolean SAVE_ALL_WORDS = false;
+	public static final int DOCUMENT_MIN_LENGTH = 10;
 
 	/**
 	 * Stopwords list. Extensive list of stopwords used to clean imported
diff --git a/vipra-util/src/main/java/de/vipra/util/MultiMap.java b/vipra-util/src/main/java/de/vipra/util/MultiMap.java
deleted file mode 100644
index 6f0fe47f..00000000
--- a/vipra-util/src/main/java/de/vipra/util/MultiMap.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package de.vipra.util;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-public class MultiMap<T, U> implements Map<T, Collection<U>> {
-
-	private final Map<T, Collection<U>> map;
-	private final boolean unique;
-
-	public MultiMap() {
-		this(false);
-	}
-
-	public MultiMap(boolean unique) {
-		this.map = new HashMap<>();
-		this.unique = unique;
-	}
-
-	@Override
-	public int size() {
-		return map.size();
-	}
-
-	@Override
-	public boolean isEmpty() {
-		return map.isEmpty();
-	}
-
-	@Override
-	public boolean containsKey(Object key) {
-		return map.containsKey(key);
-	}
-
-	@Override
-	public boolean containsValue(Object value) {
-		return map.containsValue(value);
-	}
-
-	@Override
-	public Collection<U> get(Object key) {
-		return map.get(key);
-	}
-
-	public Iterator<U> each(Object key) {
-		Collection<U> c = map.get(key);
-		if (c == null)
-			return null;
-		return c.iterator();
-	}
-
-	@Override
-	public Collection<U> put(T key, Collection<U> value) {
-		return map.put(key, value);
-	}
-
-	public void put(T key, U value) {
-		Collection<U> c = map.get(key);
-		if (c == null) {
-			if (unique)
-				c = new HashSet<>();
-			else
-				c = new ArrayList<>();
-		}
-		c.add(value);
-		map.put(key, c);
-	}
-
-	@Override
-	public Collection<U> remove(Object key) {
-		return map.remove(key);
-	}
-
-	@Override
-	public void putAll(Map<? extends T, ? extends Collection<U>> m) {
-		map.putAll(m);
-	}
-
-	@Override
-	public void clear() {
-		map.clear();
-	}
-
-	@Override
-	public Set<T> keySet() {
-		return map.keySet();
-	}
-
-	@Override
-	public Collection<Collection<U>> values() {
-		return map.values();
-	}
-
-	@Override
-	public Set<java.util.Map.Entry<T, Collection<U>>> entrySet() {
-		return map.entrySet();
-	}
-
-}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java
index 8a17356a..b3c6b025 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java
@@ -7,9 +7,7 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Date;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.bson.types.ObjectId;
 import org.mongodb.morphia.annotations.Embedded;
@@ -46,9 +44,8 @@ public class ArticleFull extends FileModel<ObjectId> implements Serializable {
 	@QueryIgnore(multi = true)
 	private String text;
 
-	@ElasticIndex("text")
 	@QueryIgnore(all = true)
-	private String processedText;
+	private String[] processedText;
 
 	private String url;
 
@@ -67,9 +64,6 @@ public class ArticleFull extends FileModel<ObjectId> implements Serializable {
 
 	private Date modified;
 
-	@Transient
-	private Map<String, String> links;
-
 	@Transient
 	private NestedMap meta;
 
@@ -104,15 +98,20 @@ public class ArticleFull extends FileModel<ObjectId> implements Serializable {
 	}
 
 	@ElasticIndex("excerpt")
-	public String serializeText() {
+	public String serializeExcerpt() {
 		return StringUtils.ellipsize(text, Constants.EXCERPT_LENGTH);
 	}
 
-	public String getProcessedText() {
+	@ElasticIndex("text")
+	public String serializeText() {
+		return StringUtils.join(processedText);
+	}
+
+	public String[] getProcessedText() {
 		return processedText;
 	}
 
-	public void setProcessedText(String processedText) {
+	public void setProcessedText(String[] processedText) {
 		this.processedText = processedText;
 	}
 
@@ -185,20 +184,6 @@ public class ArticleFull extends FileModel<ObjectId> implements Serializable {
 		this.modified = modified;
 	}
 
-	public Map<String, String> getLinks() {
-		return links;
-	}
-
-	public void setLinks(Map<String, String> links) {
-		this.links = links;
-	}
-
-	public void addLink(String key, String link) {
-		if (links == null)
-			links = new HashMap<>();
-		links.put(key, link);
-	}
-
 	public NestedMap getMeta() {
 		return meta;
 	}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
index e2066212..5e1dfedc 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
@@ -10,6 +10,9 @@ public class ArticleStats implements Serializable {
 	private static final long serialVersionUID = -4712841724990200627L;
 
 	private Long wordCount;
+	private Long uniqueWordCount;
+	private Long processedWordCount;
+	private Double reductionRatio;
 
 	public Long getWordCount() {
 		return wordCount;
@@ -19,16 +22,34 @@ public class ArticleStats implements Serializable {
 		this.wordCount = wordCount;
 	}
 
-	public static ArticleStats generateFromText(final String text) {
-		ArticleStats stats = new ArticleStats();
-		String[] words = text.split("\\s+");
-		stats.setWordCount((long) words.length);
-		return stats;
+	public Long getUniqueWordCount() {
+		return uniqueWordCount;
+	}
+
+	public void setUniqueWordCount(Long uniqueWordCount) {
+		this.uniqueWordCount = uniqueWordCount;
+	}
+
+	public Long getProcessedWordCount() {
+		return processedWordCount;
+	}
+
+	public void setProcessedWordCount(Long processedWordCount) {
+		this.processedWordCount = processedWordCount;
+	}
+
+	public Double getReductionRatio() {
+		return reductionRatio;
+	}
+
+	public void setReductionRatio(Double reductionRatio) {
+		this.reductionRatio = reductionRatio;
 	}
 
 	@Override
 	public String toString() {
-		return ArticleStats.class.getSimpleName() + "[wordCount:" + wordCount + "]";
+		return ArticleStats.class.getSimpleName() + "[wordCount:" + wordCount + ", processedWordCount:"
+				+ processedWordCount + ", reductionRatio:" + reductionRatio + "]";
 	}
 
 }
\ No newline at end of file
-- 
GitLab