From d2d109b42cf4ec15aa6d56159804e0063fb79204 Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Sun, 6 Mar 2016 20:41:02 +0100
Subject: [PATCH] updated multiple

removed iscreated, created date from word
embedding word in topicword, no loss
updated stopwords list
added sum function to sum double arrays
updated dtm analyzer, prep for topic relevance
---
 .../java/de/vipra/cmd/lda/DTMAnalyzer.java    |  54 ++++---
 .../main/java/de/vipra/util/ArrayUtils.java   |   7 +
 .../main/java/de/vipra/util/Constants.java    | 150 ++++++++++--------
 .../java/de/vipra/util/model/Sequence.java    |  10 +-
 .../java/de/vipra/util/model/TopicWord.java   |   3 +-
 .../main/java/de/vipra/util/model/Word.java   |  54 +------
 6 files changed, 128 insertions(+), 150 deletions(-)

diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
index 25fe7de0..17bca671 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
@@ -133,10 +133,26 @@ public class DTMAnalyzer extends Analyzer {
 			in.close();
 			p.waitFor();
 
-			// read topic definition files and create topics
-
 			final int wordCount = vocab.size();
 			final int sequencesCount = seqindex.sequenceCount();
+			final int articlesCount = index.size();
+
+			// read topic distributions
+
+			final File gamFile = new File(outDirSeq, "gam.dat");
+			in = new BufferedReader(new InputStreamReader(new FileInputStream(gamFile)));
+
+			final double[][] topicDistributions = new double[articlesCount][Constants.K_TOPICS];
+			for (int idxArticle = 0; idxArticle < articlesCount; idxArticle++) {
+				for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) {
+					topicDistributions[idxArticle][idxTopic] = Double.parseDouble(in.readLine());
+				}
+			}
+
+			in.close();
+
+			// read topic definition files and create topics
+
 			// collects created topics
 			final List<TopicFull> newTopics = new ArrayList<>(Constants.K_TOPICS);
 			// collects created words
@@ -179,22 +195,20 @@ public class DTMAnalyzer extends Analyzer {
 				// collect top words in each sequence for topic name
 				final Set<TopicWord> topTopicWords = new HashSet<>();
 
-				// go through each sequence and gather all words that are above
-				// the minimum relative word likeliness
+				// go through each sequence and gather all words
 				for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) {
 					// calculate relative cutoff probability
-					final double minAcceptableSeqLikeliness;
-					if (Constants.MINIMUM_RELATIVE_PROB > 0) {
-						final double maxSeqLikeliness = maxSeqLikelinesses[idxSeq];
-						minAcceptableSeqLikeliness = (maxSeqLikeliness >= 0 ? 1 : 2 - Constants.MINIMUM_RELATIVE_PROB)
-								* maxSeqLikeliness;
-					}
+					final double maxSeqLikeliness = maxSeqLikelinesses[idxSeq];
+					final double minRelativeSeqLikeliness = Constants.MINIMUM_RELATIVE_PROB
+							* Math.abs(maxSeqLikeliness);
 
 					// collect words
 					final List<TopicWord> newSeqTopicWords = new ArrayList<>(wordCount);
 					for (int idxWord = 0; idxWord < wordCount; idxWord++) {
 						final double likeliness = likelinesses[idxWord][idxSeq];
-						if (!seqRelativeCutoff || likeliness >= minAcceptableSeqLikeliness) {
+						// check if word acceptable
+						if (!seqRelativeCutoff || (maxSeqLikeliness >= 0 && likeliness >= minRelativeSeqLikeliness)
+								|| (maxSeqLikeliness < 0 && Math.abs(likeliness) >= minRelativeSeqLikeliness)) {
 							final Word word = vocab.getWord(idxWord);
 							newWords.add(word);
 							final TopicWord topicWord = new TopicWord(word, likeliness);
@@ -246,18 +260,10 @@ public class DTMAnalyzer extends Analyzer {
 
 			// create topic references
 
-			final File gamFile = new File(outDirSeq, "gam.dat");
-			in = new BufferedReader(new InputStreamReader(new FileInputStream(gamFile)));
-
+			int idxArticle = 0;
 			for (final String articleId : index) {
-				// normalize topic proportions
-				double totalTopicProportions = 0;
-				final double[] topicProportions = new double[Constants.K_TOPICS];
-				for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) {
-					final double topicProportion = Double.parseDouble(in.readLine());
-					topicProportions[idxTopic] = topicProportion;
-					totalTopicProportions += topicProportion;
-				}
+				double[] topicDistribution = topicDistributions[idxArticle++];
+				double topicDistributionSum = ArrayUtils.sum(topicDistribution);
 
 				// create topic references
 				final List<TopicRef> newTopicRefs = new ArrayList<>(Constants.K_TOPICS);
@@ -265,7 +271,7 @@ public class DTMAnalyzer extends Analyzer {
 					final TopicRef newTopicRef = new TopicRef();
 					final TopicFull topicFull = newTopics.get(idxTopic);
 					newTopicRef.setTopic(new Topic(topicFull.getId()));
-					newTopicRef.setShare(topicProportions[idxTopic] / totalTopicProportions);
+					newTopicRef.setShare(topicDistribution[idxTopic] / topicDistributionSum);
 					newTopicRefs.add(newTopicRef);
 				}
 
@@ -287,8 +293,6 @@ public class DTMAnalyzer extends Analyzer {
 				}
 			}
 
-			in.close();
-
 		} catch (IOException | InterruptedException e) {
 			throw new AnalyzerException(e);
 		}
diff --git a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java
index ce7ea92b..971bcfb2 100644
--- a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java
+++ b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java
@@ -37,4 +37,11 @@ public class ArrayUtils {
 		return maximum;
 	}
 
+	public static double sum(double[] values) {
+		double result = 0;
+		for (int i = 0; i < values.length; i++)
+			result += values[i];
+		return result;
+	}
+
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index 6232b169..7fa98232 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -143,79 +143,91 @@ public class Constants {
 	 * Stopwords list. Extensive list of stopwords used to clean imported
 	 * articles of the most common words before topic modeling is applied.
 	 */
-	public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "able", "about", "above", "abst",
-			"accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected",
-			"affecting", "affects", "after", "afterwards", "again", "against", "ah", "all", "almost", "alone", "along",
-			"already", "also", "although", "always", "am", "among", "amongst", "an", "and", "announce", "another",
-			"any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "apparently",
-			"approximately", "are", "area", "areas", "aren", "arent", "arise", "around", "as", "aside", "ask", "asked",
-			"asking", "asks", "at", "auth", "available", "away", "awfully", "b", "back", "backed", "backing", "backs",
-			"be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "began",
-			"begin", "beginning", "beginnings", "begins", "behind", "being", "beings", "believe", "below", "beside",
-			"besides", "best", "better", "between", "beyond", "big", "biol", "both", "brief", "briefly", "but", "by",
-			"c", "ca", "came", "can", "can't", "cannot", "case", "cases", "cause", "causes", "certain", "certainly",
-			"clear", "clearly", "co", "com", "come", "comes", "contain", "containing", "contains", "could", "couldnt",
-			"d", "date", "did", "didn't", "differ", "different", "differently", "do", "does", "doesn't", "doing",
-			"don't", "done", "down", "downed", "downing", "downs", "downwards", "due", "during", "e", "each", "early",
-			"ed", "edu", "effect", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ended", "ending",
-			"ends", "enough", "especially", "et", "et-al", "etc", "even", "evenly", "ever", "every", "everybody",
-			"everyone", "everything", "everywhere", "ex", "except", "f", "face", "faces", "fact", "facts", "far",
-			"felt", "few", "ff", "fifth", "find", "finds", "first", "five", "fix", "followed", "following", "follows",
-			"for", "former", "formerly", "forth", "found", "four", "from", "full", "fully", "further", "furthered",
-			"furthering", "furthermore", "furthers", "g", "gave", "general", "generally", "get", "gets", "getting",
-			"give", "given", "gives", "giving", "go", "goes", "going", "gone", "good", "goods", "got", "gotten",
-			"great", "greater", "greatest", "group", "grouped", "grouping", "groups", "h", "had", "happens", "hardly",
-			"has", "hasn't", "have", "haven't", "having", "he", "hed", "hence", "her", "here", "hereafter", "hereby",
-			"herein", "heres", "hereupon", "hers", "herself", "hes", "hi", "hid", "high", "higher", "highest", "him",
-			"himself", "his", "hither", "home", "how", "howbeit", "however", "hundred", "i", "i'll", "i've", "id", "ie",
-			"if", "im", "immediate", "immediately", "importance", "important", "in", "inc", "indeed", "index",
-			"information", "instead", "interest", "interested", "interesting", "interests", "into", "invention",
-			"inward", "is", "isn't", "it", "it'll", "itd", "its", "itself", "j", "just", "k", "keep", "keeps", "kept",
-			"kg", "kind", "km", "knew", "know", "known", "knows", "l", "large", "largely", "last", "lately", "later",
-			"latest", "latter", "latterly", "least", "less", "lest", "let", "lets", "like", "liked", "likely", "line",
+	public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "a's", "able", "about", "above",
+			"abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected",
+			"affecting", "affects", "after", "afterwards", "again", "against", "ah", "ain't", "all", "allow", "allows",
+			"almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst",
+			"amount", "an", "and", "announce", "another", "any", "anybody", "anyhow", "anymore", "anyone", "anything",
+			"anyway", "anyways", "anywhere", "apart", "apparently", "appear", "appreciate", "appropriate",
+			"approximately", "are", "area", "areas", "aren", "aren't", "arent", "arise", "around", "as", "aside", "ask",
+			"asked", "asking", "asks", "associated", "at", "auth", "available", "away", "awfully", "b", "back",
+			"backed", "backing", "backs", "be", "became", "because", "become", "becomes", "becoming", "been", "before",
+			"beforehand", "began", "begin", "beginning", "beginnings", "begins", "behind", "being", "beings", "believe",
+			"below", "beside", "besides", "best", "better", "between", "beyond", "big", "bill", "biol", "both",
+			"bottom", "brief", "briefly", "but", "by", "c", "c'mon", "c's", "ca", "call", "came", "can", "can't",
+			"cannot", "cant", "case", "cases", "cause", "causes", "certain", "certainly", "changes", "clear", "clearly",
+			"co", "com", "come", "comes", "computer", "con", "concerning", "consequently", "consider", "considering",
+			"contain", "containing", "contains", "corresponding", "could", "couldn't", "couldnt", "course", "cry",
+			"currently", "d", "date", "de", "definitely", "describe", "described", "despite", "detail", "did", "didn't",
+			"differ", "different", "differently", "do", "does", "doesn't", "doing", "don't", "done", "down", "downed",
+			"downing", "downs", "downwards", "due", "during", "e", "each", "early", "ed", "edu", "effect", "eg",
+			"eight", "eighty", "either", "eleven", "else", "elsewhere", "empty", "end", "ended", "ending", "ends",
+			"enough", "entirely", "especially", "et", "et-al", "etc", "even", "evenly", "ever", "every", "everybody",
+			"everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "face", "faces", "fact",
+			"facts", "far", "felt", "few", "ff", "fifteen", "fifth", "fify", "fill", "find", "finds", "fire", "first",
+			"five", "fix", "followed", "following", "follows", "for", "former", "formerly", "forth", "forty", "found",
+			"four", "from", "front", "full", "fully", "further", "furthered", "furthering", "furthermore", "furthers",
+			"g", "gave", "general", "generally", "get", "gets", "getting", "give", "given", "gives", "giving", "go",
+			"goes", "going", "gone", "good", "goods", "got", "gotten", "great", "greater", "greatest", "greetings",
+			"group", "grouped", "grouping", "groups", "h", "had", "hadn't", "happens", "hardly", "has", "hasn't",
+			"hasnt", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "hed", "hello", "help", "hence", "her",
+			"here", "here's", "hereafter", "hereby", "herein", "heres", "hereupon", "hers", "herse", "herself", "hes",
+			"hi", "hid", "high", "higher", "highest", "him", "himse", "himself", "his", "hither", "home", "hopefully",
+			"how", "how's", "howbeit", "however", "hundred", "i", "i'd", "i'll", "i'm", "i've", "id", "ie", "if",
+			"ignored", "im", "immediate", "immediately", "importance", "important", "in", "inasmuch", "inc", "indeed",
+			"index", "indicate", "indicated", "indicates", "information", "inner", "insofar", "instead", "interest",
+			"interested", "interesting", "interests", "into", "invention", "inward", "is", "isn't", "it", "it'd",
+			"it'll", "it's", "itd", "its", "itse", "itself", "j", "just", "k", "keep", "keeps", "kept", "kg", "kind",
+			"km", "knew", "know", "known", "knows", "l", "large", "largely", "last", "lately", "later", "latest",
+			"latter", "latterly", "least", "less", "lest", "let", "let's", "lets", "like", "liked", "likely", "line",
 			"little", "long", "longer", "longest", "look", "looking", "looks", "ltd", "m", "made", "mainly", "make",
 			"makes", "making", "man", "many", "may", "maybe", "me", "mean", "means", "meantime", "meanwhile", "member",
-			"members", "men", "merely", "mg", "might", "million", "miss", "ml", "more", "moreover", "most", "mostly",
-			"mr", "mrs", "much", "mug", "must", "my", "myself", "n", "na", "name", "namely", "nay", "nd", "near",
-			"nearly", "necessarily", "necessary", "need", "needed", "needing", "needs", "neither", "never",
-			"nevertheless", "new", "newer", "newest", "next", "nine", "ninety", "no", "nobody", "non", "none",
-			"nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing", "now", "nowhere", "number",
-			"numbers", "o", "obtain", "obtained", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "older",
-			"oldest", "omitted", "on", "once", "one", "ones", "only", "onto", "open", "opened", "opening", "opens",
-			"or", "ord", "order", "ordered", "ordering", "orders", "other", "others", "otherwise", "ought", "our",
-			"ours", "ourselves", "out", "outside", "over", "overall", "owing", "own", "p", "page", "pages", "part",
-			"parted", "particular", "particularly", "parting", "parts", "past", "per", "perhaps", "place", "placed",
-			"places", "please", "plus", "point", "pointed", "pointing", "points", "poorly", "possible", "possibly",
-			"potentially", "pp", "predominantly", "present", "presented", "presenting", "presents", "previously",
-			"primarily", "probably", "problem", "problems", "promptly", "proud", "provides", "put", "puts", "q", "que",
-			"quickly", "quite", "qv", "r", "ran", "rather", "rd", "re", "readily", "really", "recent", "recently",
-			"ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "respectively",
-			"resulted", "resulting", "results", "right", "room", "rooms", "run", "s", "said", "same", "saw", "say",
-			"saying", "says", "sec", "second", "seconds", "section", "see", "seeing", "seem", "seemed", "seeming",
-			"seems", "seen", "sees", "self", "selves", "sent", "seven", "several", "shall", "she", "she'll", "shed",
-			"shes", "should", "shouldn't", "show", "showed", "showing", "shown", "showns", "shows", "side", "sides",
-			"significant", "significantly", "similar", "similarly", "since", "six", "slightly", "small", "smaller",
-			"smallest", "so", "some", "somebody", "somehow", "someone", "somethan", "something", "sometime",
-			"sometimes", "somewhat", "somewhere", "soon", "sorry", "specifically", "specified", "specify", "specifying",
-			"state", "states", "still", "stop", "strongly", "sub", "substantially", "successfully", "such",
-			"sufficiently", "suggest", "sup", "sure", "t", "take", "taken", "taking", "tell", "tends", "th", "than",
-			"thank", "thanks", "thanx", "that", "that'll", "that've", "thats", "the", "their", "theirs", "them",
-			"themselves", "then", "thence", "there", "there'll", "there've", "thereafter", "thereby", "thered",
-			"therefore", "therein", "thereof", "therere", "theres", "thereto", "thereupon", "these", "they", "they'll",
-			"they've", "theyd", "theyre", "thing", "things", "think", "thinks", "this", "those", "thou", "though",
-			"thoughh", "thought", "thoughts", "thousand", "three", "throug", "through", "throughout", "thru", "thus",
-			"til", "tip", "to", "today", "together", "too", "took", "toward", "towards", "tried", "tries", "truly",
-			"try", "trying", "ts", "turn", "turned", "turning", "turns", "twice", "two", "u", "un", "under",
+			"members", "men", "merely", "mg", "might", "mill", "million", "mine", "miss", "ml", "more", "moreover",
+			"most", "mostly", "move", "mr", "mrs", "much", "mug", "must", "mustn't", "my", "myse", "myself", "n", "na",
+			"name", "namely", "nay", "nd", "near", "nearly", "necessarily", "necessary", "need", "needed", "needing",
+			"needs", "neither", "never", "nevertheless", "new", "newer", "newest", "next", "nine", "ninety", "no",
+			"nobody", "non", "none", "nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing",
+			"novel", "now", "nowhere", "number", "numbers", "o", "obtain", "obtained", "obviously", "of", "off",
+			"often", "oh", "ok", "okay", "old", "older", "oldest", "omitted", "on", "once", "one", "ones", "only",
+			"onto", "open", "opened", "opening", "opens", "or", "ord", "order", "ordered", "ordering", "orders",
+			"other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall",
+			"owing", "own", "p", "page", "pages", "part", "parted", "particular", "particularly", "parting", "parts",
+			"past", "per", "perhaps", "place", "placed", "places", "please", "plus", "point", "pointed", "pointing",
+			"points", "poorly", "possible", "possibly", "potentially", "pp", "predominantly", "present", "presented",
+			"presenting", "presents", "presumably", "previously", "primarily", "probably", "problem", "problems",
+			"promptly", "proud", "provides", "put", "puts", "q", "que", "quickly", "quite", "qv", "r", "ran", "rather",
+			"rd", "re", "readily", "really", "reasonably", "recent", "recently", "ref", "refs", "regarding",
+			"regardless", "regards", "related", "relatively", "research", "respectively", "resulted", "resulting",
+			"results", "right", "room", "rooms", "run", "s", "said", "same", "saw", "say", "saying", "says", "sec",
+			"second", "secondly", "seconds", "section", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen",
+			"sees", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't",
+			"she", "she'd", "she'll", "she's", "shed", "shes", "should", "shouldn't", "show", "showed", "showing",
+			"shown", "showns", "shows", "side", "sides", "significant", "significantly", "similar", "similarly",
+			"since", "sincere", "six", "sixty", "slightly", "small", "smaller", "smallest", "so", "some", "somebody",
+			"somehow", "someone", "somethan", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon",
+			"sorry", "specifically", "specified", "specify", "specifying", "state", "states", "still", "stop",
+			"strongly", "sub", "substantially", "successfully", "such", "sufficiently", "suggest", "sup", "sure",
+			"system", "t", "t's", "take", "taken", "taking", "tell", "ten", "tends", "th", "than", "thank", "thanks",
+			"thanx", "that", "that'll", "that's", "that've", "thats", "the", "their", "theirs", "them", "themselves",
+			"then", "thence", "there", "there'll", "there's", "there've", "thereafter", "thereby", "thered",
+			"therefore", "therein", "thereof", "therere", "theres", "thereto", "thereupon", "these", "they", "they'd",
+			"they'll", "they're", "they've", "theyd", "theyre", "thick", "thin", "thing", "things", "think", "thinks",
+			"third", "this", "thorough", "thoroughly", "those", "thou", "though", "thoughh", "thought", "thoughts",
+			"thousand", "three", "throug", "through", "throughout", "thru", "thus", "til", "tip", "to", "today",
+			"together", "too", "took", "top", "toward", "towards", "tried", "tries", "truly", "try", "trying", "ts",
+			"turn", "turned", "turning", "turns", "twelve", "twenty", "twice", "two", "u", "un", "under",
 			"unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "ups", "us", "use", "used",
 			"useful", "usefully", "usefulness", "uses", "using", "usually", "v", "value", "various", "very", "via",
-			"viz", "vol", "vols", "vs", "w", "want", "wanted", "wanting", "wants", "was", "wasnt", "way", "ways", "we",
-			"we'll", "we've", "wed", "welcome", "well", "wells", "went", "were", "werent", "what", "what'll",
-			"whatever", "whats", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein",
-			"wheres", "whereupon", "wherever", "whether", "which", "while", "whim", "whither", "who", "who'll", "whod",
-			"whoever", "whole", "whom", "whomever", "whos", "whose", "why", "widely", "will", "willing", "wish", "with",
-			"within", "without", "wont", "words", "work", "worked", "working", "works", "world", "would", "wouldnt",
-			"www", "x", "y", "year", "years", "yes", "yet", "you", "you'll", "you've", "youd", "young", "younger",
-			"youngest", "your", "youre", "yours", "yourself", "yourselves", "z", "zero");
+			"viz", "vol", "vols", "vs", "w", "want", "wanted", "wanting", "wants", "was", "wasn't", "wasnt", "way",
+			"ways", "we", "we'd", "we'll", "we're", "we've", "wed", "welcome", "well", "wells", "went", "were",
+			"weren't", "werent", "what", "what'll", "what's", "whatever", "whats", "when", "when's", "whence",
+			"whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "wheres", "whereupon",
+			"wherever", "whether", "which", "while", "whim", "whither", "who", "who'll", "who's", "whod", "whoever",
+			"whole", "whom", "whomever", "whos", "whose", "why", "why's", "widely", "will", "willing", "wish", "with",
+			"within", "without", "won't", "wonder", "wont", "words", "work", "worked", "working", "works", "world",
+			"would", "wouldn't", "wouldnt", "www", "x", "y", "year", "years", "yes", "yet", "you", "you'd", "you'll",
+			"you're", "you've", "youd", "young", "younger", "youngest", "your", "youre", "yours", "yourself",
+			"yourselves", "z", "zero");
 
 	/**
 	 * Disallowed chars for words in processed text segments. This regular
diff --git a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java
index 2a0050ae..d81ad458 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java
@@ -12,8 +12,8 @@ public class Sequence implements Comparable<Sequence>, Serializable {
 
 	private Date startDate;
 	private Date endDate;
-
 	private Integer number;
+	private Double relevance;
 
 	@Embedded
 	private List<TopicWord> words;
@@ -42,6 +42,14 @@ public class Sequence implements Comparable<Sequence>, Serializable {
 		this.number = number;
 	}
 
+	public Double getRelevance() {
+		return relevance;
+	}
+
+	public void setRelevance(Double relevance) {
+		this.relevance = relevance;
+	}
+
 	public List<TopicWord> getWords() {
 		return words;
 	}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java
index 81f453c0..3aad49e9 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java
@@ -3,7 +3,6 @@ package de.vipra.util.model;
 import java.io.Serializable;
 
 import org.mongodb.morphia.annotations.Embedded;
-import org.mongodb.morphia.annotations.Reference;
 
 import com.fasterxml.jackson.annotation.JsonGetter;
 import com.fasterxml.jackson.annotation.JsonIgnore;
@@ -13,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonSetter;
 @Embedded
 public class TopicWord implements Comparable<TopicWord>, Serializable {
 
-	@Reference
+	@Embedded
 	@JsonIgnore
 	private Word word;
 
diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java
index d1346505..ab0f8703 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/Word.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java
@@ -1,41 +1,17 @@
 package de.vipra.util.model;
 
 import java.io.Serializable;
-import java.util.Date;
 
 import org.mongodb.morphia.annotations.Entity;
 import org.mongodb.morphia.annotations.Id;
-import org.mongodb.morphia.annotations.Index;
-import org.mongodb.morphia.annotations.Indexes;
-import org.mongodb.morphia.annotations.PostLoad;
-import org.mongodb.morphia.annotations.PostPersist;
-import org.mongodb.morphia.annotations.PrePersist;
-import org.mongodb.morphia.annotations.Transient;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import de.vipra.util.an.QueryIgnore;
 
 @SuppressWarnings("serial")
 @Entity(value = "words", noClassnameStored = true)
-@Indexes(@Index("-created"))
 public class Word implements Model<String>, Serializable {
 
 	@Id
 	private String id;
 
-	@QueryIgnore(multi = true)
-	private Date created;
-
-	/**
-	 * The created variable is a helper that marks non-persisted new words in
-	 * the import process. Each word with created = false will be saved before
-	 * topics and topics references are created.
-	 */
-	@Transient
-	@JsonIgnore
-	private boolean isCreated = false;
-
 	public Word() {}
 
 	public Word(final String id) {
@@ -52,34 +28,6 @@ public class Word implements Model<String>, Serializable {
 		this.id = id;
 	}
 
-	public boolean isCreated() {
-		return isCreated;
-	}
-
-	public void setIsCreated(final boolean created) {
-		this.isCreated = created;
-	}
-
-	public Date getCreated() {
-		return created;
-	}
-
-	public void setCreated(final Date created) {
-		this.created = created;
-	}
-
-	@PostLoad
-	@PostPersist
-	private void postLoadPersist() {
-		this.isCreated = true;
-	}
-
-	@PrePersist
-	private void prePersist() {
-		if (this.created == null)
-			this.created = new Date();
-	}
-
 	@Override
 	public boolean equals(final Object o) {
 		if (o == null)
@@ -101,7 +49,7 @@ public class Word implements Model<String>, Serializable {
 
 	@Override
 	public String toString() {
-		return "Word [id=" + id + ", created=" + created + ", isCreated=" + isCreated + "]";
+		return "Word [id=" + id + "]";
 	}
 
 }
-- 
GitLab