From d2d109b42cf4ec15aa6d56159804e0063fb79204 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Sun, 6 Mar 2016 20:41:02 +0100 Subject: [PATCH] updated multiple removed iscreated, created date from word embedding word in topicword, no loss updated stopwords list added sum function to sum double arrays updated dtm analyzer, prep for topic relevance --- .../java/de/vipra/cmd/lda/DTMAnalyzer.java | 54 ++++--- .../main/java/de/vipra/util/ArrayUtils.java | 7 + .../main/java/de/vipra/util/Constants.java | 150 ++++++++++-------- .../java/de/vipra/util/model/Sequence.java | 10 +- .../java/de/vipra/util/model/TopicWord.java | 3 +- .../main/java/de/vipra/util/model/Word.java | 54 +------ 6 files changed, 128 insertions(+), 150 deletions(-) diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 25fe7de0..17bca671 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -133,10 +133,26 @@ public class DTMAnalyzer extends Analyzer { in.close(); p.waitFor(); - // read topic definition files and create topics - final int wordCount = vocab.size(); final int sequencesCount = seqindex.sequenceCount(); + final int articlesCount = index.size(); + + // read topic distributions + + final File gamFile = new File(outDirSeq, "gam.dat"); + in = new BufferedReader(new InputStreamReader(new FileInputStream(gamFile))); + + final double[][] topicDistributions = new double[articlesCount][Constants.K_TOPICS]; + for (int idxArticle = 0; idxArticle < articlesCount; idxArticle++) { + for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) { + topicDistributions[idxArticle][idxTopic] = Double.parseDouble(in.readLine()); + } + } + + in.close(); + + // read topic definition files and create topics + // collects created topics final List<TopicFull> newTopics = new ArrayList<>(Constants.K_TOPICS); // collects created words @@ -179,22 +195,20 @@ public class DTMAnalyzer extends Analyzer { // collect top words in each sequence for topic name final Set<TopicWord> topTopicWords = new HashSet<>(); - // go through each sequence and gather all words that are above - // the minimum relative word likeliness + // go through each sequence and gather all words for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) { // calculate relative cutoff probability - final double minAcceptableSeqLikeliness; - if (Constants.MINIMUM_RELATIVE_PROB > 0) { - final double maxSeqLikeliness = maxSeqLikelinesses[idxSeq]; - minAcceptableSeqLikeliness = (maxSeqLikeliness >= 0 ? 1 : 2 - Constants.MINIMUM_RELATIVE_PROB) - * maxSeqLikeliness; - } + final double maxSeqLikeliness = maxSeqLikelinesses[idxSeq]; + final double minRelativeSeqLikeliness = Constants.MINIMUM_RELATIVE_PROB + * Math.abs(maxSeqLikeliness); // collect words final List<TopicWord> newSeqTopicWords = new ArrayList<>(wordCount); for (int idxWord = 0; idxWord < wordCount; idxWord++) { final double likeliness = likelinesses[idxWord][idxSeq]; - if (!seqRelativeCutoff || likeliness >= minAcceptableSeqLikeliness) { + // check if word acceptable + if (!seqRelativeCutoff || (maxSeqLikeliness >= 0 && likeliness >= minRelativeSeqLikeliness) + || (maxSeqLikeliness < 0 && Math.abs(likeliness) >= minRelativeSeqLikeliness)) { final Word word = vocab.getWord(idxWord); newWords.add(word); final TopicWord topicWord = new TopicWord(word, likeliness); @@ -246,18 +260,10 @@ public class DTMAnalyzer extends Analyzer { // create topic references - final File gamFile = new File(outDirSeq, "gam.dat"); - in = new BufferedReader(new InputStreamReader(new FileInputStream(gamFile))); - + int idxArticle = 0; for (final String articleId : index) { - // normalize topic proportions - double totalTopicProportions = 0; - final double[] topicProportions = new double[Constants.K_TOPICS]; - for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) { - final double topicProportion = Double.parseDouble(in.readLine()); - topicProportions[idxTopic] = topicProportion; - totalTopicProportions += topicProportion; - } + double[] topicDistribution = topicDistributions[idxArticle++]; + double topicDistributionSum = ArrayUtils.sum(topicDistribution); // create topic references final List<TopicRef> newTopicRefs = new ArrayList<>(Constants.K_TOPICS); @@ -265,7 +271,7 @@ public class DTMAnalyzer extends Analyzer { final TopicRef newTopicRef = new TopicRef(); final TopicFull topicFull = newTopics.get(idxTopic); newTopicRef.setTopic(new Topic(topicFull.getId())); - newTopicRef.setShare(topicProportions[idxTopic] / totalTopicProportions); + newTopicRef.setShare(topicDistribution[idxTopic] / topicDistributionSum); newTopicRefs.add(newTopicRef); } @@ -287,8 +293,6 @@ public class DTMAnalyzer extends Analyzer { } } - in.close(); - } catch (IOException | InterruptedException e) { throw new AnalyzerException(e); } diff --git a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java index ce7ea92b..971bcfb2 100644 --- a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java @@ -37,4 +37,11 @@ public class ArrayUtils { return maximum; } + public static double sum(double[] values) { + double result = 0; + for (int i = 0; i < values.length; i++) + result += values[i]; + return result; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 6232b169..7fa98232 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -143,79 +143,91 @@ public class Constants { * Stopwords list. Extensive list of stopwords used to clean imported * articles of the most common words before topic modeling is applied. */ - public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "able", "about", "above", "abst", - "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected", - "affecting", "affects", "after", "afterwards", "again", "against", "ah", "all", "almost", "alone", "along", - "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "announce", "another", - "any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "apparently", - "approximately", "are", "area", "areas", "aren", "arent", "arise", "around", "as", "aside", "ask", "asked", - "asking", "asks", "at", "auth", "available", "away", "awfully", "b", "back", "backed", "backing", "backs", - "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "began", - "begin", "beginning", "beginnings", "begins", "behind", "being", "beings", "believe", "below", "beside", - "besides", "best", "better", "between", "beyond", "big", "biol", "both", "brief", "briefly", "but", "by", - "c", "ca", "came", "can", "can't", "cannot", "case", "cases", "cause", "causes", "certain", "certainly", - "clear", "clearly", "co", "com", "come", "comes", "contain", "containing", "contains", "could", "couldnt", - "d", "date", "did", "didn't", "differ", "different", "differently", "do", "does", "doesn't", "doing", - "don't", "done", "down", "downed", "downing", "downs", "downwards", "due", "during", "e", "each", "early", - "ed", "edu", "effect", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ended", "ending", - "ends", "enough", "especially", "et", "et-al", "etc", "even", "evenly", "ever", "every", "everybody", - "everyone", "everything", "everywhere", "ex", "except", "f", "face", "faces", "fact", "facts", "far", - "felt", "few", "ff", "fifth", "find", "finds", "first", "five", "fix", "followed", "following", "follows", - "for", "former", "formerly", "forth", "found", "four", "from", "full", "fully", "further", "furthered", - "furthering", "furthermore", "furthers", "g", "gave", "general", "generally", "get", "gets", "getting", - "give", "given", "gives", "giving", "go", "goes", "going", "gone", "good", "goods", "got", "gotten", - "great", "greater", "greatest", "group", "grouped", "grouping", "groups", "h", "had", "happens", "hardly", - "has", "hasn't", "have", "haven't", "having", "he", "hed", "hence", "her", "here", "hereafter", "hereby", - "herein", "heres", "hereupon", "hers", "herself", "hes", "hi", "hid", "high", "higher", "highest", "him", - "himself", "his", "hither", "home", "how", "howbeit", "however", "hundred", "i", "i'll", "i've", "id", "ie", - "if", "im", "immediate", "immediately", "importance", "important", "in", "inc", "indeed", "index", - "information", "instead", "interest", "interested", "interesting", "interests", "into", "invention", - "inward", "is", "isn't", "it", "it'll", "itd", "its", "itself", "j", "just", "k", "keep", "keeps", "kept", - "kg", "kind", "km", "knew", "know", "known", "knows", "l", "large", "largely", "last", "lately", "later", - "latest", "latter", "latterly", "least", "less", "lest", "let", "lets", "like", "liked", "likely", "line", + public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "a's", "able", "about", "above", + "abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected", + "affecting", "affects", "after", "afterwards", "again", "against", "ah", "ain't", "all", "allow", "allows", + "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", + "amount", "an", "and", "announce", "another", "any", "anybody", "anyhow", "anymore", "anyone", "anything", + "anyway", "anyways", "anywhere", "apart", "apparently", "appear", "appreciate", "appropriate", + "approximately", "are", "area", "areas", "aren", "aren't", "arent", "arise", "around", "as", "aside", "ask", + "asked", "asking", "asks", "associated", "at", "auth", "available", "away", "awfully", "b", "back", + "backed", "backing", "backs", "be", "became", "because", "become", "becomes", "becoming", "been", "before", + "beforehand", "began", "begin", "beginning", "beginnings", "begins", "behind", "being", "beings", "believe", + "below", "beside", "besides", "best", "better", "between", "beyond", "big", "bill", "biol", "both", + "bottom", "brief", "briefly", "but", "by", "c", "c'mon", "c's", "ca", "call", "came", "can", "can't", + "cannot", "cant", "case", "cases", "cause", "causes", "certain", "certainly", "changes", "clear", "clearly", + "co", "com", "come", "comes", "computer", "con", "concerning", "consequently", "consider", "considering", + "contain", "containing", "contains", "corresponding", "could", "couldn't", "couldnt", "course", "cry", + "currently", "d", "date", "de", "definitely", "describe", "described", "despite", "detail", "did", "didn't", + "differ", "different", "differently", "do", "does", "doesn't", "doing", "don't", "done", "down", "downed", + "downing", "downs", "downwards", "due", "during", "e", "each", "early", "ed", "edu", "effect", "eg", + "eight", "eighty", "either", "eleven", "else", "elsewhere", "empty", "end", "ended", "ending", "ends", + "enough", "entirely", "especially", "et", "et-al", "etc", "even", "evenly", "ever", "every", "everybody", + "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "face", "faces", "fact", + "facts", "far", "felt", "few", "ff", "fifteen", "fifth", "fify", "fill", "find", "finds", "fire", "first", + "five", "fix", "followed", "following", "follows", "for", "former", "formerly", "forth", "forty", "found", + "four", "from", "front", "full", "fully", "further", "furthered", "furthering", "furthermore", "furthers", + "g", "gave", "general", "generally", "get", "gets", "getting", "give", "given", "gives", "giving", "go", + "goes", "going", "gone", "good", "goods", "got", "gotten", "great", "greater", "greatest", "greetings", + "group", "grouped", "grouping", "groups", "h", "had", "hadn't", "happens", "hardly", "has", "hasn't", + "hasnt", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "hed", "hello", "help", "hence", "her", + "here", "here's", "hereafter", "hereby", "herein", "heres", "hereupon", "hers", "herse", "herself", "hes", + "hi", "hid", "high", "higher", "highest", "him", "himse", "himself", "his", "hither", "home", "hopefully", + "how", "how's", "howbeit", "however", "hundred", "i", "i'd", "i'll", "i'm", "i've", "id", "ie", "if", + "ignored", "im", "immediate", "immediately", "importance", "important", "in", "inasmuch", "inc", "indeed", + "index", "indicate", "indicated", "indicates", "information", "inner", "insofar", "instead", "interest", + "interested", "interesting", "interests", "into", "invention", "inward", "is", "isn't", "it", "it'd", + "it'll", "it's", "itd", "its", "itse", "itself", "j", "just", "k", "keep", "keeps", "kept", "kg", "kind", + "km", "knew", "know", "known", "knows", "l", "large", "largely", "last", "lately", "later", "latest", + "latter", "latterly", "least", "less", "lest", "let", "let's", "lets", "like", "liked", "likely", "line", "little", "long", "longer", "longest", "look", "looking", "looks", "ltd", "m", "made", "mainly", "make", "makes", "making", "man", "many", "may", "maybe", "me", "mean", "means", "meantime", "meanwhile", "member", - "members", "men", "merely", "mg", "might", "million", "miss", "ml", "more", "moreover", "most", "mostly", - "mr", "mrs", "much", "mug", "must", "my", "myself", "n", "na", "name", "namely", "nay", "nd", "near", - "nearly", "necessarily", "necessary", "need", "needed", "needing", "needs", "neither", "never", - "nevertheless", "new", "newer", "newest", "next", "nine", "ninety", "no", "nobody", "non", "none", - "nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing", "now", "nowhere", "number", - "numbers", "o", "obtain", "obtained", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "older", - "oldest", "omitted", "on", "once", "one", "ones", "only", "onto", "open", "opened", "opening", "opens", - "or", "ord", "order", "ordered", "ordering", "orders", "other", "others", "otherwise", "ought", "our", - "ours", "ourselves", "out", "outside", "over", "overall", "owing", "own", "p", "page", "pages", "part", - "parted", "particular", "particularly", "parting", "parts", "past", "per", "perhaps", "place", "placed", - "places", "please", "plus", "point", "pointed", "pointing", "points", "poorly", "possible", "possibly", - "potentially", "pp", "predominantly", "present", "presented", "presenting", "presents", "previously", - "primarily", "probably", "problem", "problems", "promptly", "proud", "provides", "put", "puts", "q", "que", - "quickly", "quite", "qv", "r", "ran", "rather", "rd", "re", "readily", "really", "recent", "recently", - "ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "respectively", - "resulted", "resulting", "results", "right", "room", "rooms", "run", "s", "said", "same", "saw", "say", - "saying", "says", "sec", "second", "seconds", "section", "see", "seeing", "seem", "seemed", "seeming", - "seems", "seen", "sees", "self", "selves", "sent", "seven", "several", "shall", "she", "she'll", "shed", - "shes", "should", "shouldn't", "show", "showed", "showing", "shown", "showns", "shows", "side", "sides", - "significant", "significantly", "similar", "similarly", "since", "six", "slightly", "small", "smaller", - "smallest", "so", "some", "somebody", "somehow", "someone", "somethan", "something", "sometime", - "sometimes", "somewhat", "somewhere", "soon", "sorry", "specifically", "specified", "specify", "specifying", - "state", "states", "still", "stop", "strongly", "sub", "substantially", "successfully", "such", - "sufficiently", "suggest", "sup", "sure", "t", "take", "taken", "taking", "tell", "tends", "th", "than", - "thank", "thanks", "thanx", "that", "that'll", "that've", "thats", "the", "their", "theirs", "them", - "themselves", "then", "thence", "there", "there'll", "there've", "thereafter", "thereby", "thered", - "therefore", "therein", "thereof", "therere", "theres", "thereto", "thereupon", "these", "they", "they'll", - "they've", "theyd", "theyre", "thing", "things", "think", "thinks", "this", "those", "thou", "though", - "thoughh", "thought", "thoughts", "thousand", "three", "throug", "through", "throughout", "thru", "thus", - "til", "tip", "to", "today", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", - "try", "trying", "ts", "turn", "turned", "turning", "turns", "twice", "two", "u", "un", "under", + "members", "men", "merely", "mg", "might", "mill", "million", "mine", "miss", "ml", "more", "moreover", + "most", "mostly", "move", "mr", "mrs", "much", "mug", "must", "mustn't", "my", "myse", "myself", "n", "na", + "name", "namely", "nay", "nd", "near", "nearly", "necessarily", "necessary", "need", "needed", "needing", + "needs", "neither", "never", "nevertheless", "new", "newer", "newest", "next", "nine", "ninety", "no", + "nobody", "non", "none", "nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing", + "novel", "now", "nowhere", "number", "numbers", "o", "obtain", "obtained", "obviously", "of", "off", + "often", "oh", "ok", "okay", "old", "older", "oldest", "omitted", "on", "once", "one", "ones", "only", + "onto", "open", "opened", "opening", "opens", "or", "ord", "order", "ordered", "ordering", "orders", + "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", + "owing", "own", "p", "page", "pages", "part", "parted", "particular", "particularly", "parting", "parts", + "past", "per", "perhaps", "place", "placed", "places", "please", "plus", "point", "pointed", "pointing", + "points", "poorly", "possible", "possibly", "potentially", "pp", "predominantly", "present", "presented", + "presenting", "presents", "presumably", "previously", "primarily", "probably", "problem", "problems", + "promptly", "proud", "provides", "put", "puts", "q", "que", "quickly", "quite", "qv", "r", "ran", "rather", + "rd", "re", "readily", "really", "reasonably", "recent", "recently", "ref", "refs", "regarding", + "regardless", "regards", "related", "relatively", "research", "respectively", "resulted", "resulting", + "results", "right", "room", "rooms", "run", "s", "said", "same", "saw", "say", "saying", "says", "sec", + "second", "secondly", "seconds", "section", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", + "sees", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't", + "she", "she'd", "she'll", "she's", "shed", "shes", "should", "shouldn't", "show", "showed", "showing", + "shown", "showns", "shows", "side", "sides", "significant", "significantly", "similar", "similarly", + "since", "sincere", "six", "sixty", "slightly", "small", "smaller", "smallest", "so", "some", "somebody", + "somehow", "someone", "somethan", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", + "sorry", "specifically", "specified", "specify", "specifying", "state", "states", "still", "stop", + "strongly", "sub", "substantially", "successfully", "such", "sufficiently", "suggest", "sup", "sure", + "system", "t", "t's", "take", "taken", "taking", "tell", "ten", "tends", "th", "than", "thank", "thanks", + "thanx", "that", "that'll", "that's", "that've", "thats", "the", "their", "theirs", "them", "themselves", + "then", "thence", "there", "there'll", "there's", "there've", "thereafter", "thereby", "thered", + "therefore", "therein", "thereof", "therere", "theres", "thereto", "thereupon", "these", "they", "they'd", + "they'll", "they're", "they've", "theyd", "theyre", "thick", "thin", "thing", "things", "think", "thinks", + "third", "this", "thorough", "thoroughly", "those", "thou", "though", "thoughh", "thought", "thoughts", + "thousand", "three", "throug", "through", "throughout", "thru", "thus", "til", "tip", "to", "today", + "together", "too", "took", "top", "toward", "towards", "tried", "tries", "truly", "try", "trying", "ts", + "turn", "turned", "turning", "turns", "twelve", "twenty", "twice", "two", "u", "un", "under", "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "ups", "us", "use", "used", "useful", "usefully", "usefulness", "uses", "using", "usually", "v", "value", "various", "very", "via", - "viz", "vol", "vols", "vs", "w", "want", "wanted", "wanting", "wants", "was", "wasnt", "way", "ways", "we", - "we'll", "we've", "wed", "welcome", "well", "wells", "went", "were", "werent", "what", "what'll", - "whatever", "whats", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", - "wheres", "whereupon", "wherever", "whether", "which", "while", "whim", "whither", "who", "who'll", "whod", - "whoever", "whole", "whom", "whomever", "whos", "whose", "why", "widely", "will", "willing", "wish", "with", - "within", "without", "wont", "words", "work", "worked", "working", "works", "world", "would", "wouldnt", - "www", "x", "y", "year", "years", "yes", "yet", "you", "you'll", "you've", "youd", "young", "younger", - "youngest", "your", "youre", "yours", "yourself", "yourselves", "z", "zero"); + "viz", "vol", "vols", "vs", "w", "want", "wanted", "wanting", "wants", "was", "wasn't", "wasnt", "way", + "ways", "we", "we'd", "we'll", "we're", "we've", "wed", "welcome", "well", "wells", "went", "were", + "weren't", "werent", "what", "what'll", "what's", "whatever", "whats", "when", "when's", "whence", + "whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "wheres", "whereupon", + "wherever", "whether", "which", "while", "whim", "whither", "who", "who'll", "who's", "whod", "whoever", + "whole", "whom", "whomever", "whos", "whose", "why", "why's", "widely", "will", "willing", "wish", "with", + "within", "without", "won't", "wonder", "wont", "words", "work", "worked", "working", "works", "world", + "would", "wouldn't", "wouldnt", "www", "x", "y", "year", "years", "yes", "yet", "you", "you'd", "you'll", + "you're", "you've", "youd", "young", "younger", "youngest", "your", "youre", "yours", "yourself", + "yourselves", "z", "zero"); /** * Disallowed chars for words in processed text segments. This regular diff --git a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java index 2a0050ae..d81ad458 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java @@ -12,8 +12,8 @@ public class Sequence implements Comparable<Sequence>, Serializable { private Date startDate; private Date endDate; - private Integer number; + private Double relevance; @Embedded private List<TopicWord> words; @@ -42,6 +42,14 @@ public class Sequence implements Comparable<Sequence>, Serializable { this.number = number; } + public Double getRelevance() { + return relevance; + } + + public void setRelevance(Double relevance) { + this.relevance = relevance; + } + public List<TopicWord> getWords() { return words; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java index 81f453c0..3aad49e9 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java @@ -3,7 +3,6 @@ package de.vipra.util.model; import java.io.Serializable; import org.mongodb.morphia.annotations.Embedded; -import org.mongodb.morphia.annotations.Reference; import com.fasterxml.jackson.annotation.JsonGetter; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -13,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonSetter; @Embedded public class TopicWord implements Comparable<TopicWord>, Serializable { - @Reference + @Embedded @JsonIgnore private Word word; diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java index d1346505..ab0f8703 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Word.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java @@ -1,41 +1,17 @@ package de.vipra.util.model; import java.io.Serializable; -import java.util.Date; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; -import org.mongodb.morphia.annotations.Index; -import org.mongodb.morphia.annotations.Indexes; -import org.mongodb.morphia.annotations.PostLoad; -import org.mongodb.morphia.annotations.PostPersist; -import org.mongodb.morphia.annotations.PrePersist; -import org.mongodb.morphia.annotations.Transient; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") @Entity(value = "words", noClassnameStored = true) -@Indexes(@Index("-created")) public class Word implements Model<String>, Serializable { @Id private String id; - @QueryIgnore(multi = true) - private Date created; - - /** - * The created variable is a helper that marks non-persisted new words in - * the import process. Each word with created = false will be saved before - * topics and topics references are created. - */ - @Transient - @JsonIgnore - private boolean isCreated = false; - public Word() {} public Word(final String id) { @@ -52,34 +28,6 @@ public class Word implements Model<String>, Serializable { this.id = id; } - public boolean isCreated() { - return isCreated; - } - - public void setIsCreated(final boolean created) { - this.isCreated = created; - } - - public Date getCreated() { - return created; - } - - public void setCreated(final Date created) { - this.created = created; - } - - @PostLoad - @PostPersist - private void postLoadPersist() { - this.isCreated = true; - } - - @PrePersist - private void prePersist() { - if (this.created == null) - this.created = new Date(); - } - @Override public boolean equals(final Object o) { if (o == null) @@ -101,7 +49,7 @@ public class Word implements Model<String>, Serializable { @Override public String toString() { - return "Word [id=" + id + ", created=" + created + ", isCreated=" + isCreated + "]"; + return "Word [id=" + id + "]"; } } -- GitLab