diff --git a/jgibblda/.settings/org.eclipse.jdt.core.prefs b/jgibblda/.settings/org.eclipse.jdt.core.prefs index 1913b2950e234e853ad0dcb12bb5014f858e9f40..bf52a73f0cb4d7fde47825bf5e19ce3b293969af 100644 --- a/jgibblda/.settings/org.eclipse.jdt.core.prefs +++ b/jgibblda/.settings/org.eclipse.jdt.core.prefs @@ -62,7 +62,7 @@ org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=true +org.eclipse.jdt.core.formatter.comment.format_header=false org.eclipse.jdt.core.formatter.comment.format_html=true org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true org.eclipse.jdt.core.formatter.comment.format_line_comments=true @@ -290,7 +290,7 @@ org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true org.eclipse.jdt.core.formatter.tabulation.char=tab org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=true +org.eclipse.jdt.core.formatter.use_on_off_tags=false org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index b318812cd25c9e29f160a8b16a4c3fe860515f93..44cda50b3a9b778e0a38b047e8bcb0839aa39c1e 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -275,6 +275,15 @@ }, "buffers": [ + { + "contents": "stemming to lemmatization (corenlp benutzen)\ntop n words for topic name\nbuch: natural language processing with java\ndynamic lda\n\nVISUALISIERUNG!", + "settings": + { + "buffer_size": 144, + "line_ending": "Unix", + "name": "stemming to lemmatization (corenlp benutzen)" + } + } ], "build_system": "", "build_system_choices": @@ -453,14 +462,31 @@ }, "expanded_folders": [ - "/home/eike/repos/master/ma-impl" + "/home/eike/repos/master/ma-impl", + "/home/eike/repos/master/ma-impl/vipra-ui", + "/home/eike/repos/master/ma-impl/vipra-ui/app", + "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters", + "/home/eike/repos/master/ma-impl/vipra-ui/app/components", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components" ], "file_history": [ + "/home/eike/repos/master/ma-impl/vm/data/test-1.json", + "/home/eike/repos/master/ma-impl/vm/data/test-2.json", + "/home/eike/.local/share/vipra/jgibb/jgibb.twords", + "/home/eike/.local/share/vipra/jgibb/jgibb.tassign", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/index.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters/application.js", + "/home/eike/.local/share/vipra/jgibb/jgibb", + "/home/eike/repos/master/ma-doc/thesis/thesis.tex", "/home/eike/repos/master/ma-impl/Vagrantfile", "/home/eike/repos/master/ma-impl/vm/bootstrap.sh", "/home/eike/repos/master/ma-impl/vm/webapps/ROOT/index.html", - "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters/application.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/index.hbs", @@ -470,7 +496,6 @@ "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/index.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/router.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/index.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/index.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/show.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/edit.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show.hbs", @@ -512,13 +537,10 @@ "/core", "/home/eike/repos/master/ma-impl/vm/config/environment", "/home/eike/repos/master/ma-impl/vm/config/initd-tomcat", - "/home/eike/.local/share/vipra/jgibb/jgibb.tassign", - "/home/eike/.local/share/vipra/jgibb/jgibb.twords", "/home/eike/Repositories/fu/ss15/ma/impl/TODO", "/home/eike/.local/share/vipra/jgibb/vocab", "/home/eike/.local/share/vipra/jgibb/index", "/home/eike/.local/share/vipra/jgibb/jgibb.phi", - "/home/eike/.local/share/vipra/jgibb/jgibb", "/home/eike/Downloads/JGibbLDA-v.1.0/src/jgibblda/Constants.java", "/home/eike/Downloads/JGibbLDA-v.1.0/models/casestudy-en/model-final.others", "/home/eike/Downloads/JGibbLDA-v.1.0/models/casestudy-en/model-final.twords", @@ -580,11 +602,7 @@ "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/environment", "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/articles.hbs", "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/application.hbs", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/disable-transparent-hugepages", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/spark-env.sh", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/rc.sh", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/rc.local", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/env.sh" + "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/disable-transparent-hugepages" ], "find": { @@ -635,6 +653,11 @@ "case_sensitive": false, "find_history": [ + " ", + "\\n\\n", + "\\n", + ",", + "compar", "de.vipra.cmd.model", "indent", "00:00Z\" },", @@ -757,19 +780,17 @@ "right", "select_all", "category.new", - "Account", - "Aktual", - "logged_in?", - "intended", - "redirect_intended", - "amount-in" + "Account" ], "highlight": true, "in_selection": false, "preserve_case": false, - "regex": false, + "regex": true, "replace_history": [ + "\", \"", + " ", + "", "00:00Z\",", "", "Z", @@ -894,10 +915,7 @@ "ul.menu", "<br>", "@append$1", - "survey", - "SurveysController", - "", - "/assets" + "survey" ], "reverse": false, "show_context": true, @@ -908,8 +926,38 @@ "groups": [ { + "selected": 0, "sheets": [ + { + "buffer": 0, + "semi_transient": false, + "settings": + { + "buffer_size": 144, + "regions": + { + }, + "selection": + [ + [ + 99, + 99 + ] + ], + "settings": + { + "auto_name": "stemming to lemmatization (corenlp benutzen)", + "default_dir": "/home/eike/repos/master/ma-impl", + "syntax": "Packages/Text/Plain text.tmLanguage" + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 0, + "type": "text" + } ] } ], @@ -960,7 +1008,7 @@ "project": "ma-impl.sublime-project", "replace": { - "height": 46.0 + "height": 66.0 }, "save_all_on_build": true, "select_file": diff --git a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs index 1913b2950e234e853ad0dcb12bb5014f858e9f40..bf52a73f0cb4d7fde47825bf5e19ce3b293969af 100644 --- a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs +++ b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs @@ -62,7 +62,7 @@ org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=true +org.eclipse.jdt.core.formatter.comment.format_header=false org.eclipse.jdt.core.formatter.comment.format_html=true org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true org.eclipse.jdt.core.formatter.comment.format_line_comments=true @@ -290,7 +290,7 @@ org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true org.eclipse.jdt.core.formatter.tabulation.char=tab org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=true +org.eclipse.jdt.core.formatter.use_on_off_tags=false org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true diff --git a/vipra-cmd/pom.xml b/vipra-cmd/pom.xml index e6d8ba395abdb86d4d2fa41371e6a7415326e90c..5aed56ed1592d2fc7f23fb6262bfb56bdbc0963e 100644 --- a/vipra-cmd/pom.xml +++ b/vipra-cmd/pom.xml @@ -51,6 +51,12 @@ <artifactId>stanford-corenlp</artifactId> <version>3.5.2</version> </dependency> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>3.5.2</version> + <classifier>models</classifier> + </dependency> <!-- Lucene --> <dependency> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 946eb6b9d5d57dacbdfde5c51a1186fbe3766805..4c488805545eb165f9bb8e73868370a9b7e6e88e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -93,7 +93,7 @@ public class Main { try { c.run(); } catch (ExecutionException e) { - out.error(e.getMessage()); + out.error(e.getMessage(), e); ConsoleUtils.Choice choice; boolean acceptDefault = cline.hasOption(OPT_DEFAULTS); do { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java index bcfdf9f66f1acd07d554f26af8dbaaa0700bb0f7..684f0f65e536fbc0ee5662b5a4d50eb67fb5d710 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java @@ -18,7 +18,7 @@ import de.vipra.util.ConvertStream; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.Topic; -import de.vipra.util.model.TopicCount; +import de.vipra.util.model.TopicRef; import de.vipra.util.model.TopicWord; import jgibblda.Estimator; import jgibblda.Inferencer; @@ -109,7 +109,9 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { break; } } + Collections.sort(topicWords); topicDef.setWords(topicWords); + topicDef.setName(topicDef.getNameFromWords()); return topicDef; } }; @@ -119,12 +121,12 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { } @Override - public ConvertStream<List<TopicCount>> getTopics() throws LDAAnalyzerException { + public ConvertStream<List<TopicRef>> getTopics() throws LDAAnalyzerException { File tassign = new File(modelDir, "jgibb.tassign"); try { - return new ConvertStream<List<TopicCount>>(tassign) { + return new ConvertStream<List<TopicRef>>(tassign) { @Override - public List<TopicCount> convert(String line) { + public List<TopicRef> convert(String line) { // count topics Map<String, Integer> countMap = new HashMap<>(); String[] wordList = line.split("\\s+"); @@ -135,9 +137,12 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { } // turn into list - List<TopicCount> topicCount = new ArrayList<>(countMap.size()); + List<TopicRef> topicCount = new ArrayList<>(countMap.size()); for (Entry<String, Integer> e : countMap.entrySet()) { - topicCount.add(new TopicCount(e.getKey(), e.getValue())); + TopicRef tc = new TopicRef(); + tc.setTopicId(e.getKey()); + tc.setCount(e.getValue()); + topicCount.add(tc); } Collections.sort(topicCount, Collections.reverseOrder()); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java index 90d93345041007a489caeaf277153a898645cb62..dc3e11319d6591d1172d194055219ef05f090a7a 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java @@ -8,7 +8,7 @@ import de.vipra.util.Config.Key; import de.vipra.util.Constants; import de.vipra.util.ConvertStream; import de.vipra.util.model.Topic; -import de.vipra.util.model.TopicCount; +import de.vipra.util.model.TopicRef; public abstract class LDAAnalyzer { @@ -28,7 +28,7 @@ public abstract class LDAAnalyzer { public abstract ConvertStream<Topic> getTopicDefinitions() throws LDAAnalyzerException; - public abstract ConvertStream<List<TopicCount>> getTopics() throws LDAAnalyzerException; + public abstract ConvertStream<List<TopicRef>> getTopics() throws LDAAnalyzerException; public static LDAAnalyzer getAnalyzer(Config config) throws LDAAnalyzerException { LDAAnalyzer analyzer = null; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java index 26496207dada056996861186df74f92df3ec4ac4..178ec40448a8b5c801f1034fbcb62a2c3e246995 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -12,7 +12,6 @@ import de.vipra.cmd.ex.ClearException; import de.vipra.cmd.model.ProcessedArticle; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; -import de.vipra.util.Constants; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.Topic; import de.vipra.util.service.DatabaseService; @@ -34,8 +33,8 @@ public class ClearCommand implements Command { private void clear() throws ClearException, ConfigException { try { config = Config.getConfig(); - dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, ProcessedArticle.class); - dbTopics = DatabaseService.getDatabaseService(config, Constants.Collection.TOPICS, Topic.class); + dbArticles = DatabaseService.getDatabaseService(config, ProcessedArticle.class); + dbTopics = DatabaseService.getDatabaseService(config, Topic.class); } catch (Exception e) { throw new ClearException(e); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java index d1d7981a3eb11a85525738c73f6f48d41908f163..4795c0cfda1aa997f5244141329ed7eaa3437467 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java @@ -13,7 +13,6 @@ import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.file.Filebase; import de.vipra.cmd.model.ProcessedArticle; import de.vipra.util.Config; -import de.vipra.util.Constants; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.service.DatabaseService; @@ -78,8 +77,7 @@ public class DeleteCommand implements Command { public void run() throws ExecutionException { try { config = Config.getConfig(); - dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, - ProcessedArticle.class); + dbArticles = DatabaseService.getDatabaseService(config, ProcessedArticle.class); filebase = Filebase.getFilebase(config); } catch (IOException | FilebaseException | ConfigException e) { throw new ExecutionException(e); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 438ffaf63cdc77016912817f73a02e654713c0cf..2f85032520412e00a193a14b93a3913f2d73de07 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -28,14 +28,13 @@ import de.vipra.cmd.model.ProcessedArticle; import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.util.Config; -import de.vipra.util.Constants; import de.vipra.util.ConvertStream; import de.vipra.util.StringUtils; import de.vipra.util.Timer; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleStats; import de.vipra.util.model.Topic; -import de.vipra.util.model.TopicCount; +import de.vipra.util.model.TopicRef; import de.vipra.util.service.DatabaseService; public class ImportCommand implements Command { @@ -190,21 +189,21 @@ public class ImportCommand implements Command { * @throws LDAAnalyzerException */ private void saveTopicsPerDocument(Map<String, String> topicIndexMap) throws LDAAnalyzerException { - ConvertStream<List<TopicCount>> topics = analyzer.getTopics(); + ConvertStream<List<TopicRef>> topics = analyzer.getTopics(); FilebaseIndex index = filebase.getIndex(); Iterator<String> indexIter = index.iterator(); - Iterator<List<TopicCount>> topicIter = topics.iterator(); + Iterator<List<TopicRef>> topicIter = topics.iterator(); while (indexIter.hasNext() && topicIter.hasNext()) { String id = indexIter.next(); - List<TopicCount> topicCount = topicIter.next(); - for (TopicCount tc : topicCount) { - String oid = topicIndexMap.get(tc.getId()); + List<TopicRef> topicCount = topicIter.next(); + for (TopicRef tc : topicCount) { + String oid = topicIndexMap.get(tc.getTopicId()); if (oid != null) - tc.setId(topicIndexMap.get(tc.getId())); + tc.setTopicId(topicIndexMap.get(tc.getTopicId())); else - log.error("no object id for topic index " + tc.getId()); + log.error("no object id for topic index " + tc.getTopicId()); } ProcessedArticle a = dbArticles.getSingle(id); if (a != null) @@ -223,9 +222,8 @@ public class ImportCommand implements Command { public void run() throws ExecutionException { try { config = Config.getConfig(); - dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, - ProcessedArticle.class); - dbTopics = DatabaseService.getDatabaseService(config, Constants.Collection.TOPICS, Topic.class); + dbArticles = DatabaseService.getDatabaseService(config, ProcessedArticle.class); + dbTopics = DatabaseService.getDatabaseService(config, Topic.class); filebase = Filebase.getFilebase(config); preprocessor = Processor.getPreprocessor(config); analyzer = LDAAnalyzer.getAnalyzer(config); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java index 8410d7848d626a094f22cef4252a4ff40b8c8489..774fd43291690fbb0d065166f1ec7f43be745e03 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java @@ -10,7 +10,6 @@ import de.vipra.cmd.ExecutionException; import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.file.Filebase; import de.vipra.util.Config; -import de.vipra.util.Constants; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.Topic; @@ -38,7 +37,7 @@ public class StatsCommand implements Command { try { config = Config.getConfig(); filebase = Filebase.getFilebase(config); - dbTopics = DatabaseService.getDatabaseService(config, Constants.Collection.TOPICS, Topic.class); + dbTopics = DatabaseService.getDatabaseService(config, Topic.class); stats(); } catch (IOException | ConfigException | FilebaseException e) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java new file mode 100644 index 0000000000000000000000000000000000000000..d0534bca8a96cb1943cb90951155fcb20a169a83 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java @@ -0,0 +1,47 @@ +package de.vipra.cmd.text; + +import java.util.List; +import java.util.Properties; + +import de.vipra.cmd.ex.PreprocessorException; +import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.StringUtils; + +public class CoreNLPProcessor extends Processor { + + private StanfordCoreNLP nlp; + + public CoreNLPProcessor(List<String> stopWordsList) { + super("Stanford CoreNLP Processor"); + + Properties props = new Properties(); + props.setProperty("customAnnotatorClass.stopwords", StopwordsAnnotator.class.getCanonicalName()); + props.setProperty("annotators", "tokenize, ssplit, stopwords, pos, lemma"); + props.setProperty("stopwords", StringUtils.join(stopWordsList)); + + nlp = new StanfordCoreNLP(props); + } + + @Override + public ProcessedText preprocess(String input) throws PreprocessorException { + Annotation doc = new Annotation(input); + nlp.annotate(doc); + StringBuilder sb = new StringBuilder(); + List<CoreMap> sentences = doc.get(SentencesAnnotation.class); + for (CoreMap sentence : sentences) { + List<CoreLabel> words = sentence.get(TokensAnnotation.class); + for (CoreLabel word : words) { + Boolean b = word.get(StopwordsAnnotator.class); + if (b == null || !b) + sb.append(word.word()).append(" "); + } + } + return new ProcessedText(sb.toString().trim()); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java index 8dfd50aa161ce9151610269579d6e096c9eb24af..fcf6521d6cf66f89724d0e972971f7a94bc1e57e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java @@ -23,18 +23,17 @@ public abstract class Processor { public abstract ProcessedText preprocess(String input) throws PreprocessorException; public static Processor getPreprocessor(Config config) { - List<String> stopWords = Arrays.asList(config.getString(Key.STOPWORDS).toLowerCase().split(",")); - if (stopWords.size() == 0) { - stopWords = Constants.STOPWORDS; - } + List<String> stopWords = Constants.STOPWORDS; switch (Constants.Processor.fromString(config.getString(Key.PREPROCESSOR))) { case CUSTOM: return new CustomProcessor(stopWords); + case CORENLP: + return new CoreNLPProcessor(stopWords); case LUCENE: - case DEFAULT: - default: return new LuceneProcessor(stopWords); + default: + return null; } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java new file mode 100644 index 0000000000000000000000000000000000000000..5415cde706b16540f3dd50c475dac74f998aa689 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java @@ -0,0 +1,51 @@ +package de.vipra.cmd.text; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import edu.stanford.nlp.ling.CoreAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.Annotator; + +public class StopwordsAnnotator implements Annotator, CoreAnnotation<Boolean> { + + public static final String NAME = "stopwords"; + + private Set<String> stopWords; + + public StopwordsAnnotator(String input, Properties props) { + stopWords = new HashSet<String>(Arrays.asList(props.getProperty(NAME).split(" "))); + } + + @Override + public void annotate(Annotation annotation) { + List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); + for (CoreLabel token : tokens) { + if (stopWords.contains(token.word().toLowerCase())) { + token.set(StopwordsAnnotator.class, true); + } + } + } + + @Override + public Set<Requirement> requirementsSatisfied() { + return Collections.singleton(new Requirement(NAME)); + } + + @Override + public Set<Requirement> requires() { + return TOKENIZE_AND_SSPLIT; + } + + @Override + public Class<Boolean> getType() { + return Boolean.class; + } + +} diff --git a/vipra-rest/.settings/org.eclipse.jdt.core.prefs b/vipra-rest/.settings/org.eclipse.jdt.core.prefs index c07c252bc5d05edd0375e06b1d52f0a921ae65a0..bf52a73f0cb4d7fde47825bf5e19ce3b293969af 100644 --- a/vipra-rest/.settings/org.eclipse.jdt.core.prefs +++ b/vipra-rest/.settings/org.eclipse.jdt.core.prefs @@ -14,7 +14,7 @@ org.eclipse.jdt.core.compiler.source=1.8 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=48 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 @@ -22,7 +22,7 @@ org.eclipse.jdt.core.formatter.alignment_for_assignment=0 org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 +org.eclipse.jdt.core.formatter.alignment_for_enum_constants=49 org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 @@ -31,7 +31,7 @@ org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=48 org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 @@ -91,7 +91,7 @@ org.eclipse.jdt.core.formatter.indent_empty_lines=false org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true org.eclipse.jdt.core.formatter.indentation.size=4 org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert diff --git a/vipra-rest/.settings/org.eclipse.jdt.ui.prefs b/vipra-rest/.settings/org.eclipse.jdt.ui.prefs index fe89f28bca5900b59ebe3dc0ed0745189fa9e153..5713c654549b40f76104760ab2f6cd031d71f011 100644 --- a/vipra-rest/.settings/org.eclipse.jdt.ui.prefs +++ b/vipra-rest/.settings/org.eclipse.jdt.ui.prefs @@ -1,2 +1,3 @@ eclipse.preferences.version=1 +formatter_profile=_vipra formatter_settings_version=12 diff --git a/vipra-rest/pom.xml b/vipra-rest/pom.xml index 415ff18fa35cb35bb8c3476dfef36167207fc665..8b81b7ee714fb55b749b29339048e3261b561fb8 100644 --- a/vipra-rest/pom.xml +++ b/vipra-rest/pom.xml @@ -33,7 +33,6 @@ </dependency> <dependency> <groupId>org.glassfish.jersey.media</groupId> - <!--<artifactId>jersey-media-moxy</artifactId> --> <artifactId>jersey-media-json-jackson</artifactId> <version>${jerseyVersion}</version> </dependency> @@ -83,6 +82,13 @@ <scope>runtime</scope> </dependency> + <!-- Caching --> + <dependency> + <groupId>org.ehcache</groupId> + <artifactId>ehcache</artifactId> + <version>3.0.0.m4</version> + </dependency> + <!-- MongoDB Database Adapter --> <dependency> <groupId>org.mongodb</groupId> diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/InitializationListener.java b/vipra-rest/src/main/java/de/vipra/rest/provider/InitializationListener.java new file mode 100644 index 0000000000000000000000000000000000000000..6555abaf5eb276c376b1c2b26e6b0f9aa404660f --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/provider/InitializationListener.java @@ -0,0 +1,33 @@ +package de.vipra.rest.provider; + +import javax.servlet.ServletContext; +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; + +import org.ehcache.CacheManager; +import org.ehcache.CacheManagerBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class InitializationListener implements ServletContextListener { + + public static final Logger log = LoggerFactory.getLogger(InitializationListener.class); + + @Override + public void contextDestroyed(ServletContextEvent sce) { + log.info("jersey servlet context destroyed"); + ServletContext ctx = sce.getServletContext(); + CacheManager manager = (CacheManager) ctx.getAttribute("cacheManager"); + if (manager != null) + manager.close(); + } + + @Override + public void contextInitialized(ServletContextEvent sce) { + log.info("jersey servlet context initialized"); + ServletContext ctx = sce.getServletContext(); + CacheManager manager = CacheManagerBuilder.newCacheManagerBuilder().build(true); + ctx.setAttribute("cachemanager", manager); + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java index b8cc2188dca150d22fb5a6627d080160de53b900..9289222c737362b445d2e4318b2686381e424c40 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java +++ b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java @@ -13,10 +13,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.module.SimpleModule; -import de.vipra.rest.serializer.ArticleDeserializer; -import de.vipra.rest.serializer.ArticleSerializer; -import de.vipra.rest.serializer.TopicDeserializer; -import de.vipra.rest.serializer.TopicSerializer; +import de.vipra.rest.serializer.GenericDeserializer; +import de.vipra.rest.serializer.GenericSerializer; import de.vipra.util.Constants; import de.vipra.util.model.Article; import de.vipra.util.model.Topic; @@ -39,10 +37,10 @@ public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { public static ObjectMapper createDefaultMapper() { SimpleModule module = new SimpleModule(); - module.addSerializer(Article.class, new ArticleSerializer()); - module.addDeserializer(Article.class, new ArticleDeserializer()); - module.addSerializer(Topic.class, new TopicSerializer()); - module.addDeserializer(Topic.class, new TopicDeserializer()); + module.addSerializer(Article.class, new GenericSerializer<Article>(Article.class)); + module.addDeserializer(Article.class, new GenericDeserializer<Article>(Article.class)); + module.addSerializer(Topic.class, new GenericSerializer<Topic>(Topic.class)); + module.addDeserializer(Topic.class, new GenericDeserializer<Topic>(Topic.class)); final ObjectMapper mapper = new ObjectMapper(); mapper.enable(SerializationFeature.INDENT_OUTPUT); diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/WrapperResponseFilter.java b/vipra-rest/src/main/java/de/vipra/rest/provider/WrapperResponseFilter.java deleted file mode 100644 index a7215196448954528232bbcc69411c86b2bf667b..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/provider/WrapperResponseFilter.java +++ /dev/null @@ -1,18 +0,0 @@ -package de.vipra.rest.provider; - -import java.io.IOException; - -import javax.ws.rs.container.ContainerRequestContext; -import javax.ws.rs.container.ContainerResponseContext; -import javax.ws.rs.container.ContainerResponseFilter; - -public class WrapperResponseFilter implements ContainerResponseFilter { - - @SuppressWarnings("unused") - @Override - public void filter(ContainerRequestContext arg0, ContainerResponseContext arg1) throws IOException { - Object entity = arg1.getEntity(); - int status = arg1.getStatus(); - } - -} diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java index 379d5f253b2609021ab5ab1dddda526311eed1e0..26cfc36e8a67f5924793743ad04323b7d0f78d31 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java @@ -18,6 +18,10 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import org.ehcache.Cache; +import org.ehcache.CacheManager; +import org.ehcache.config.CacheConfigurationBuilder; + import de.vipra.rest.APIMediaType; import de.vipra.rest.Messages; import de.vipra.rest.PATCH; @@ -36,12 +40,20 @@ public class ArticleResource { @Context UriInfo uri; + Cache<String, Article> articleCache; + final ArticleService service; public ArticleResource(@Context ServletContext servletContext) throws ConfigException, IOException { Config config = Config.getConfig(); Mongo mongo = Mongo.getInstance(config); service = new ArticleService(mongo); + + CacheManager manager = (CacheManager) servletContext.getAttribute("cachemanager"); + articleCache = manager.getCache("articlecache", String.class, Article.class); + if (articleCache == null) + articleCache = manager.createCache("articlecache", + CacheConfigurationBuilder.newCacheConfigurationBuilder().buildConfig(String.class, Article.class)); } @GET @@ -65,7 +77,16 @@ public class ArticleResource { String.format(Messages.BAD_REQUEST, "id cannot be empty"))); return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); } - Article article = service.getSingle(id); + + // caching + Article article = articleCache.get(id); + if (article == null) { + article = service.getSingle(id); + if (article != null) + articleCache.put(id, article); + } + + // checking if (article != null) { res.setData(article); return Response.ok().entity(res).tag(res.tag()).build(); @@ -104,16 +125,17 @@ public class ArticleResource { "item could not be created due to an internal server error")); return Response.serverError().entity(res).build(); } + articleCache.remove(id); int del = deleted > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) deleted; switch (del) { - case 0: - res.addError(new APIError(Response.Status.NOT_FOUND, "Article not found", - String.format(Messages.NOT_FOUND, "article", id))); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); - case 1: - return Response.noContent().build(); - default: - return Response.serverError().build(); + case 0: + res.addError(new APIError(Response.Status.NOT_FOUND, "Article not found", + String.format(Messages.NOT_FOUND, "article", id))); + return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + case 1: + return Response.noContent().build(); + default: + return Response.serverError().build(); } } @@ -126,6 +148,7 @@ public class ArticleResource { Wrapper<Article> res = new Wrapper<>(); try { service.updateSingle(article); + articleCache.put(id, article); res.setData(article); return Response.ok().entity(res).tag(res.tag()).build(); } catch (DatabaseException e) { diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/PingResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/PingResource.java new file mode 100644 index 0000000000000000000000000000000000000000..028c1cafc565077b56471b27eed997bb7c5ad2c4 --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/PingResource.java @@ -0,0 +1,18 @@ +package de.vipra.rest.resource; + +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +@Path("ping") +public class PingResource { + + @GET + @Produces(MediaType.TEXT_PLAIN) + public Response ping() { + return Response.ok().entity("running").build(); + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java index 52b04d76f5483fbb042ad0c5ae09d5a9c3db50da..3d4a05979986abdfee303f960914f9c3ca1db928 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java @@ -90,7 +90,7 @@ public class TopicResource { return Response.serverError().entity(res).build(); } } - + @PATCH @Consumes(APIMediaType.APPLICATION_JSONAPI) @Produces(APIMediaType.APPLICATION_JSONAPI) diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleDeserializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleDeserializer.java deleted file mode 100644 index 4f4fddf584fd7298958d3574079bf04038593d25..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleDeserializer.java +++ /dev/null @@ -1,46 +0,0 @@ -package de.vipra.rest.serializer; - -import static de.vipra.rest.serializer.JsonHelper.getString; -import static de.vipra.rest.serializer.JsonHelper.stringToDate; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; - -import de.vipra.util.model.Article; - -public class ArticleDeserializer extends JsonDeserializer<Article> { - - @Override - public Article deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { - Article article = null; - - JsonNode node = p.readValueAsTree(); - if (node != null) { - article = new Article(); - if (node.has("id")) - article.setId(getString(node, "id")); - - if (node.has("attributes")) { - JsonNode attrs = node.get("attributes"); - if (attrs.has("title")) - article.setTitle(getString(attrs, "title")); - if (attrs.has("text")) - article.setText(getString(attrs, "text")); - if (attrs.has("url")) - article.setUrl(getString(attrs, "url")); - if (attrs.has("date")) - article.setDate(stringToDate(getString(attrs, "date"))); - // TODO implement stats deserializer - // TODO implement topics deserializer - } - } - - return article; - } - -} \ No newline at end of file diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleSerializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleSerializer.java deleted file mode 100644 index c09049ec12c3952c03abcbf102d92a48de13bc80..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/ArticleSerializer.java +++ /dev/null @@ -1,41 +0,0 @@ -package de.vipra.rest.serializer; - -import static de.vipra.rest.serializer.JsonHelper.dateToString; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.SerializerProvider; - -import de.vipra.util.model.Article; - -public class ArticleSerializer extends JsonSerializer<Article> { - - @Override - public void serialize(Article value, JsonGenerator gen, SerializerProvider serializers) - throws IOException, JsonProcessingException { - gen.writeStartObject(); - gen.writeStringField("id", value.getId().toString()); - gen.writeStringField("type", "article"); - - gen.writeObjectFieldStart("attributes"); - if (value.getTitle() != null) - gen.writeStringField("title", value.getTitle()); - if (value.getText() != null) - gen.writeStringField("text", value.getText()); - if (value.getUrl() != null) - gen.writeStringField("url", value.getUrl()); - if (value.getDate() != null) - gen.writeStringField("date", dateToString(value.getDate())); - if (value.getStats() != null) - gen.writeObjectField("stats", value.getStats()); - if (value.getTopics() != null) - gen.writeObjectField("topics", value.getTopics()); - gen.writeEndObject(); - - gen.writeEndObject(); - } - -} \ No newline at end of file diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java new file mode 100644 index 0000000000000000000000000000000000000000..710073d2687bba690a903960188d9dfd147cfad6 --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java @@ -0,0 +1,112 @@ +package de.vipra.rest.serializer; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; + +import de.vipra.util.an.JsonField; +import de.vipra.util.an.JsonIgnore; +import de.vipra.util.an.JsonWrap; +import de.vipra.util.model.Model; + +public class GenericDeserializer<T extends Model> extends JsonDeserializer<T> { + + private final Class<T> clazz; + private final Set<String> nestingPrefixes = new HashSet<>(); + private final Map<String, Field> allFields = new HashMap<>(); + + public GenericDeserializer(Class<T> clazz) { + this.clazz = clazz; + + Field[] fields = clazz.getDeclaredFields(); + for (Field field : fields) { + if (Modifier.isPrivate(field.getModifiers())) { + field.setAccessible(true); + + JsonIgnore ji = field.getDeclaredAnnotation(JsonIgnore.class); + if (ji != null && ji.value()) + continue; + + String name = field.getName(); + + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + + JsonField jf = field.getDeclaredAnnotation(JsonField.class); + if (jf != null) + name = jf.value(); + + JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); + if (jp != null) + name = jp.value(); + + allFields.put(name, field); + + String[] parts = name.split("\\."); + if (parts.length > 1) { + String currentPrefix = parts[0]; + nestingPrefixes.add(currentPrefix); + for (int i = 1; i < parts.length - 1; i++) { + currentPrefix += "." + parts[i]; + nestingPrefixes.add(currentPrefix); + } + } + } + } + } + + @Override + public T deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { + T value = null; + + try { + value = clazz.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new IOException("could not instantiate object. Default ctor is missing."); + } + + while (p.nextToken() != JsonToken.END_OBJECT) { + String name = p.getCurrentName(); + Field field = allFields.get(name); + p.nextToken(); + if (field != null) { + try { + field.set(value, p.readValueAs(field.getType())); + } catch (IllegalArgumentException | IllegalAccessException e) {} + } else if (nestingPrefixes.contains(name)) { + getNested(p, value, name + "."); + } + } + + return value; + } + + public void getNested(JsonParser p, T value, String prefix) throws JsonParseException, IOException { + while (p.nextToken() != JsonToken.END_OBJECT) { + String name = prefix + p.getCurrentName(); + Field field = allFields.get(name); + p.nextToken(); + if (field != null) { + try { + field.set(value, p.readValueAs(field.getType())); + } catch (IllegalArgumentException | IllegalAccessException e) {} + } else if (nestingPrefixes.contains(name)) { + getNested(p, value, name + "."); + } + } + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java new file mode 100644 index 0000000000000000000000000000000000000000..85311f4a35b009239972ebd794fdbfe7153e81e1 --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java @@ -0,0 +1,125 @@ +package de.vipra.rest.serializer; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.bson.types.ObjectId; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; + +import de.vipra.util.an.JsonField; +import de.vipra.util.an.JsonType; +import de.vipra.util.an.JsonWrap; +import de.vipra.util.model.Model; + +public class GenericSerializer<T extends Model> extends JsonSerializer<T> { + + private final String typeName; + private final List<Entry<String, Field>> allFields; + + public GenericSerializer(Class<T> clazz) { + JsonType aField = clazz.getDeclaredAnnotation(JsonType.class); + if (aField != null) + this.typeName = aField.value().toLowerCase(); + else + this.typeName = clazz.getSimpleName().toLowerCase(); + + Map<String, Field> foundFields = new HashMap<>(); + + Field[] fields = clazz.getDeclaredFields(); + for (Field field : fields) { + if (Modifier.isPrivate(field.getModifiers())) { + field.setAccessible(true); + + JsonIgnore ji = field.getDeclaredAnnotation(JsonIgnore.class); + if (ji != null && ji.value()) + continue; + + String name = field.getName(); + + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + + JsonField jf = field.getDeclaredAnnotation(JsonField.class); + if (jf != null) + name = jf.value(); + + JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); + if (jp != null) + name = jp.value(); + + foundFields.put(name, field); + } + } + + this.allFields = new ArrayList<>(foundFields.entrySet()); + + Collections.sort(this.allFields, new Comparator<Entry<String, Field>>() { + @Override + public int compare(Entry<String, Field> o1, Entry<String, Field> o2) { + return o1.getKey().compareTo(o2.getKey()); + } + }); + } + + @Override + public void serialize(T value, JsonGenerator gen, SerializerProvider serializers) + throws IOException, JsonProcessingException { + + Map<String, Object> map = new HashMap<>(); + pathAdd(map, "type", typeName); + + for (Entry<String, Field> entry : allFields) { + Object v = null; + try { + v = entry.getValue().get(value); + } catch (IllegalArgumentException | IllegalAccessException e) { + e.printStackTrace(); + } + + if (v != null) { + if (v instanceof ObjectId) + v = ((ObjectId) v).toString(); + + pathAdd(map, entry.getKey(), v); + } + } + + serializers.defaultSerializeValue(map, gen); + } + + @SuppressWarnings("unchecked") + private static void pathAdd(Map<String, Object> map, String path, Object value) { + Map<String, Object> current = map; + String[] parts = path.split("\\."); + String name = parts.length > 0 ? parts[parts.length - 1] : path; + + for (int i = 0; i < parts.length - 1; i++) { + Object o = current.get(parts[i]); + if (o == null || !(o instanceof Map)) { + HashMap<String, Object> newMap = new HashMap<>(); + current.put(parts[i], newMap); + current = newMap; + } else { + current = (Map<String, Object>) o; + } + } + + current.put(name, value); + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java deleted file mode 100644 index ebda14f7a65edbcdd8e1909fb60358071dd2e17b..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java +++ /dev/null @@ -1,71 +0,0 @@ -package de.vipra.rest.serializer; - -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; - -import com.fasterxml.jackson.databind.JsonNode; - -import de.vipra.util.Constants; - -public class JsonHelper { - - public static <T> T get(JsonNode node, String name, T defaultValue, Class<T> type) { - if (node == null) { - return defaultValue; - } - node = node.get(name); - if (node == null) { - return defaultValue; - } - switch (type.getSimpleName()) { - case "String": - return type.cast(node.asText()); - case "Integer": - return type.cast(node.asInt()); - case "Long": - return type.cast(node.asLong()); - } - return null; - } - - public static String getString(JsonNode node, String name, String defaultValue) { - return get(node, name, defaultValue, String.class); - } - - public static String getString(JsonNode node, String name) { - return getString(node, name, null); - } - - public static long getLong(JsonNode node, String name, long defaultValue) { - return get(node, name, defaultValue, Long.class); - } - - public static long getLong(JsonNode node, String name) { - return getLong(node, name, 0L); - } - - public static int getInt(JsonNode node, String name, int defaultValue) { - return get(node, name, defaultValue, Integer.class); - } - - public static int getInt(JsonNode node, String name) { - return getInt(node, name, 0); - } - - public static String dateToString(Date date) { - DateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT); - return df.format(date); - } - - public static Date stringToDate(String source) { - DateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT); - try { - return df.parse(source); - } catch (ParseException e) { - return null; - } - } - -} \ No newline at end of file diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicDeserializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicDeserializer.java deleted file mode 100644 index cb908e17e9723feaf895ee8c2da7f22df66a5a93..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicDeserializer.java +++ /dev/null @@ -1,43 +0,0 @@ -package de.vipra.rest.serializer; - -import static de.vipra.rest.serializer.JsonHelper.getInt; -import static de.vipra.rest.serializer.JsonHelper.getString; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; - -import de.vipra.util.model.Topic; - -public class TopicDeserializer extends JsonDeserializer<Topic> { - - @Override - public Topic deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { - Topic topic = null; - - JsonNode node = p.readValueAsTree(); - if (node != null) { - topic = new Topic(); - if (node.has("id")) - topic.setId(getString(node, "id")); - - if (node.has("attributes")) { - JsonNode attrs = node.get("attributes"); - if (attrs.has("name")) - topic.setName(getString(attrs, "name")); - if (attrs.has("index")) - topic.setIndex(getInt(attrs, "index")); - if (attrs.has("words")) { - // TODO implement - } - } - } - - return topic; - } - -} diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicSerializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicSerializer.java deleted file mode 100644 index 4b9c58cc4649aa2a4d0ca558e8dcb4ecc5335c75..0000000000000000000000000000000000000000 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/TopicSerializer.java +++ /dev/null @@ -1,32 +0,0 @@ -package de.vipra.rest.serializer; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.SerializerProvider; - -import de.vipra.util.model.Topic; - -public class TopicSerializer extends JsonSerializer<Topic> { - - @Override - public void serialize(Topic value, JsonGenerator gen, SerializerProvider serializer) - throws IOException, JsonProcessingException { - gen.writeStartObject(); - gen.writeStringField("id", value.getId().toString()); - gen.writeStringField("type", "topic"); - - gen.writeObjectFieldStart("attributes"); - gen.writeNumberField("index", value.getIndex()); - if (value.getName() != null) - gen.writeStringField("name", value.getName()); - if (value.getWords() != null) - gen.writeObjectField("words", value.getWords()); - gen.writeEndObject(); - - gen.writeEndObject(); - } - -} diff --git a/vipra-rest/src/main/webapp/WEB-INF/web.xml b/vipra-rest/src/main/webapp/WEB-INF/web.xml index 1c086c6bb5c1a3213510bb932d8319404d3a2a7d..82cb399f584552f08f0afe0a51a90ded7c14711b 100644 --- a/vipra-rest/src/main/webapp/WEB-INF/web.xml +++ b/vipra-rest/src/main/webapp/WEB-INF/web.xml @@ -1,8 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> -<web-app xmlns="http://xmlns.jcp.org/xml/ns/javaee" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/javaee http://xmlns.jcp.org/xml/ns/javaee/web-app_3_1.xsd" - version="3.1"> +<web-app xmlns="http://xmlns.jcp.org/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/javaee http://xmlns.jcp.org/xml/ns/javaee/web-app_3_1.xsd" + version="3.1"> <servlet> <servlet-name>jersey</servlet-name> <servlet-class>org.glassfish.jersey.servlet.ServletContainer</servlet-class> @@ -15,4 +14,7 @@ <servlet-name>jersey</servlet-name> <url-pattern>/*</url-pattern> </servlet-mapping> + <listener> + <listener-class>de.vipra.rest.provider.InitializationListener</listener-class> + </listener> </web-app> \ No newline at end of file diff --git a/vipra-ui/app/adapters/application.js b/vipra-ui/app/adapters/application.js index c14dc91b50c796761e9b28c764ce301880937f35..0d442b61c170d4fa40db3aae849cc2e0cc783dc3 100644 --- a/vipra-ui/app/adapters/application.js +++ b/vipra-ui/app/adapters/application.js @@ -1,7 +1,7 @@ import DS from 'ember-data'; export default DS.JSONAPIAdapter.extend({ - host: `http://${window.location.hostname}:8000`, + host: `http://${window.location.hostname}:8080`, namespace: 'vipra-rest', updateRecord(store, type, snapshot) { var data = {}; diff --git a/vipra-ui/app/routes/topics/index.js b/vipra-ui/app/routes/topics/index.js index 3d419f443ad90df4c82b79ce87abc09bbe481a77..ef14ad4bccdd995300f8c0ec7ec1f248a840a00c 100644 --- a/vipra-ui/app/routes/topics/index.js +++ b/vipra-ui/app/routes/topics/index.js @@ -1,7 +1,7 @@ import Ember from 'ember'; export default Ember.Route.extend({ - model(params) { + model() { return Ember.RSVP.hash({ topics: this.store.findAll('topic') }); diff --git a/vipra-ui/app/templates/articles/index.hbs b/vipra-ui/app/templates/articles/index.hbs index 86df0159b9c403a4a892787ff0344a30d9265d07..9b8d3390bf86c001a7e35f1f79ef5acd5d1d78b2 100644 --- a/vipra-ui/app/templates/articles/index.hbs +++ b/vipra-ui/app/templates/articles/index.hbs @@ -2,6 +2,6 @@ <h2>Found articles</h2> -{{debounced-input placeholder='Filter' size='50' valueBinding='filter' debounce='150'}} +{{debounced-input placeholder='Filter' size='50' value=filter debounce='150'}} {{articles-list items=model.articles filter=filter}} \ No newline at end of file diff --git a/vipra-util/.settings/org.eclipse.jdt.core.prefs b/vipra-util/.settings/org.eclipse.jdt.core.prefs index 52003d93b882e0735573b1ca1241484bf74cfb2f..84a81ceba42c0cea2774a52031b93584974a6b42 100644 --- a/vipra-util/.settings/org.eclipse.jdt.core.prefs +++ b/vipra-util/.settings/org.eclipse.jdt.core.prefs @@ -61,7 +61,7 @@ org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=true +org.eclipse.jdt.core.formatter.comment.format_header=false org.eclipse.jdt.core.formatter.comment.format_html=true org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true org.eclipse.jdt.core.formatter.comment.format_line_comments=true @@ -289,7 +289,7 @@ org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true org.eclipse.jdt.core.formatter.tabulation.char=tab org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=true +org.eclipse.jdt.core.formatter.use_on_off_tags=false org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index 70892015d160d666c80fd39cc717950bcf67b07e..134f5668e3db1092452d476a85899faf7f9b5c79 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -17,8 +17,8 @@ public class Config { DBPORT("db.port", Constants.DEFAULT_PORT), DBNAME("db.name", Constants.DEFAULT_DB), DATADIR("fs.datadir", null), - PREPROCESSOR("an.preprocessor", Constants.DEFAULT_PROCESSOR.name), - ANALYZER("an.analyzer", Constants.DEFAULT_ANALYZER.name), + PREPROCESSOR("an.preprocessor", Constants.Processor.DEFAULT.name), + ANALYZER("an.analyzer", Constants.Analyzer.DEFAULT.name), STOPWORDS("an.stopwords", ""); private final String name; diff --git a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java index 03eda7359824a89b7bfd093e6ddc9f07954ba60b..a5eefb8f09426027b32a35f51de94dfa01b25690 100644 --- a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java @@ -15,20 +15,22 @@ public class ConsoleUtils { public static final Logger log = LoggerFactory.getLogger(ConsoleUtils.class); public static enum Choice { - ABORT("abort"), - CONTINUE("continue"), - RETRY("retry"); + ABORT("[a]bort", "a"), + CONTINUE("[c]ontinue", "c"), + RETRY("[r]etry", "r"); public final String choice; + public final String shortChoice; - Choice(String choice) { + Choice(String choice, String shortChoice) { this.choice = choice; + this.shortChoice = shortChoice; } public static Choice fromString(String text) { if (text != null) { for (Choice b : Choice.values()) { - if (text.equalsIgnoreCase(b.choice)) { + if (text.equalsIgnoreCase(b.choice) || text.equalsIgnoreCase(b.shortChoice)) { return b; } } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 1fe21bf34434a02a9f456d56153d3d46d2a6c6ef..c5eea30ef8a97ba510884f3d40ba5f676078ab3e 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -7,41 +7,125 @@ import java.util.List; public class Constants { + /* + * FILEBASE + */ + public static final String FB_DIR = "vipra"; public static final Charset FB_ENCODING = StandardCharsets.UTF_8; + /* + * FILES + */ + public static final String CONFIG_FILE = "config.properties"; public static final String INDEX_FILE = "index"; public static final String VOCAB_FILE = "vocab"; + /* + * DATABASE + */ + public static final String DEFAULT_HOST = "localhost"; public static final int DEFAULT_PORT = 27017; public static final String DEFAULT_DB = "test"; + /** + * The global date time format. Will be used for conversion from and to + * database and frontend dates. + */ public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; + /** + * Disallowed chars for words in processed text segments. This regular + * expression is used to strip text of characters that should not be + * processed. + */ public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9]"; - public static final Processor DEFAULT_PROCESSOR = Processor.LUCENE; - public static final Analyzer DEFAULT_ANALYZER = Analyzer.JGIBB; - - public static final List<String> STOPWORDS = Arrays.asList("a", "an", "and", "are", "as", "at", "be", "but", "by", - "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", - "then", "there", "these", "they", "this", "to", "was", "will", "with"); - - public static enum Collection { - ARTICLES("articles"), - TOPICS("topics"); - - public final String name; - - private Collection(String name) { - this.name = name; - } - } + /** + * The number of words to be used to generate a topic name. The top n words + * (sorted by likeliness) are used to generate a name for unnamed topics. + */ + public static final int AUTO_TOPIC_WORDS = 4; + + public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "able", "about", "above", "abst", + "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected", + "affecting", "affects", "after", "afterwards", "again", "against", "ah", "all", "almost", "alone", "along", + "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "announce", "another", + "any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "apparently", + "approximately", "are", "area", "areas", "aren", "arent", "arise", "around", "as", "aside", "ask", "asked", + "asking", "asks", "at", "auth", "available", "away", "awfully", "b", "back", "backed", "backing", "backs", + "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "began", + "begin", "beginning", "beginnings", "begins", "behind", "being", "beings", "believe", "below", "beside", + "besides", "best", "better", "between", "beyond", "big", "biol", "both", "brief", "briefly", "but", "by", + "c", "ca", "came", "can", "can't", "cannot", "case", "cases", "cause", "causes", "certain", "certainly", + "clear", "clearly", "co", "com", "come", "comes", "contain", "containing", "contains", "could", "couldnt", + "d", "date", "did", "didn't", "differ", "different", "differently", "do", "does", "doesn't", "doing", + "don't", "done", "down", "downed", "downing", "downs", "downwards", "due", "during", "e", "each", "early", + "ed", "edu", "effect", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ended", "ending", + "ends", "enough", "especially", "et", "et-al", "etc", "even", "evenly", "ever", "every", "everybody", + "everyone", "everything", "everywhere", "ex", "except", "f", "face", "faces", "fact", "facts", "far", + "felt", "few", "ff", "fifth", "find", "finds", "first", "five", "fix", "followed", "following", "follows", + "for", "former", "formerly", "forth", "found", "four", "from", "full", "fully", "further", "furthered", + "furthering", "furthermore", "furthers", "g", "gave", "general", "generally", "get", "gets", "getting", + "give", "given", "gives", "giving", "go", "goes", "going", "gone", "good", "goods", "got", "gotten", + "great", "greater", "greatest", "group", "grouped", "grouping", "groups", "h", "had", "happens", "hardly", + "has", "hasn't", "have", "haven't", "having", "he", "hed", "hence", "her", "here", "hereafter", "hereby", + "herein", "heres", "hereupon", "hers", "herself", "hes", "hi", "hid", "high", "higher", "highest", "him", + "himself", "his", "hither", "home", "how", "howbeit", "however", "hundred", "i", "i'll", "i've", "id", "ie", + "if", "im", "immediate", "immediately", "importance", "important", "in", "inc", "indeed", "index", + "information", "instead", "interest", "interested", "interesting", "interests", "into", "invention", + "inward", "is", "isn't", "it", "it'll", "itd", "its", "itself", "j", "just", "k", "keep", "keeps", "kept", + "kg", "kind", "km", "knew", "know", "known", "knows", "l", "large", "largely", "last", "lately", "later", + "latest", "latter", "latterly", "least", "less", "lest", "let", "lets", "like", "liked", "likely", "line", + "little", "long", "longer", "longest", "look", "looking", "looks", "ltd", "m", "made", "mainly", "make", + "makes", "making", "man", "many", "may", "maybe", "me", "mean", "means", "meantime", "meanwhile", "member", + "members", "men", "merely", "mg", "might", "million", "miss", "ml", "more", "moreover", "most", "mostly", + "mr", "mrs", "much", "mug", "must", "my", "myself", "n", "na", "name", "namely", "nay", "nd", "near", + "nearly", "necessarily", "necessary", "need", "needed", "needing", "needs", "neither", "never", + "nevertheless", "new", "newer", "newest", "next", "nine", "ninety", "no", "nobody", "non", "none", + "nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing", "now", "nowhere", "number", + "numbers", "o", "obtain", "obtained", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "older", + "oldest", "omitted", "on", "once", "one", "ones", "only", "onto", "open", "opened", "opening", "opens", + "or", "ord", "order", "ordered", "ordering", "orders", "other", "others", "otherwise", "ought", "our", + "ours", "ourselves", "out", "outside", "over", "overall", "owing", "own", "p", "page", "pages", "part", + "parted", "particular", "particularly", "parting", "parts", "past", "per", "perhaps", "place", "placed", + "places", "please", "plus", "point", "pointed", "pointing", "points", "poorly", "possible", "possibly", + "potentially", "pp", "predominantly", "present", "presented", "presenting", "presents", "previously", + "primarily", "probably", "problem", "problems", "promptly", "proud", "provides", "put", "puts", "q", "que", + "quickly", "quite", "qv", "r", "ran", "rather", "rd", "re", "readily", "really", "recent", "recently", + "ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "respectively", + "resulted", "resulting", "results", "right", "room", "rooms", "run", "s", "said", "same", "saw", "say", + "saying", "says", "sec", "second", "seconds", "section", "see", "seeing", "seem", "seemed", "seeming", + "seems", "seen", "sees", "self", "selves", "sent", "seven", "several", "shall", "she", "she'll", "shed", + "shes", "should", "shouldn't", "show", "showed", "showing", "shown", "showns", "shows", "side", "sides", + "significant", "significantly", "similar", "similarly", "since", "six", "slightly", "small", "smaller", + "smallest", "so", "some", "somebody", "somehow", "someone", "somethan", "something", "sometime", + "sometimes", "somewhat", "somewhere", "soon", "sorry", "specifically", "specified", "specify", "specifying", + "state", "states", "still", "stop", "strongly", "sub", "substantially", "successfully", "such", + "sufficiently", "suggest", "sup", "sure", "t", "take", "taken", "taking", "tell", "tends", "th", "than", + "thank", "thanks", "thanx", "that", "that'll", "that've", "thats", "the", "their", "theirs", "them", + "themselves", "then", "thence", "there", "there'll", "there've", "thereafter", "thereby", "thered", + "therefore", "therein", "thereof", "therere", "theres", "thereto", "thereupon", "these", "they", "they'll", + "they've", "theyd", "theyre", "thing", "things", "think", "thinks", "this", "those", "thou", "though", + "thoughh", "thought", "thoughts", "thousand", "three", "throug", "through", "throughout", "thru", "thus", + "til", "tip", "to", "today", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", + "try", "trying", "ts", "turn", "turned", "turning", "turns", "twice", "two", "u", "un", "under", + "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "ups", "us", "use", "used", + "useful", "usefully", "usefulness", "uses", "using", "usually", "v", "value", "various", "very", "via", + "viz", "vol", "vols", "vs", "w", "want", "wanted", "wanting", "wants", "was", "wasnt", "way", "ways", "we", + "we'll", "we've", "wed", "welcome", "well", "wells", "went", "were", "werent", "what", "what'll", + "whatever", "whats", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", + "wheres", "whereupon", "wherever", "whether", "which", "while", "whim", "whither", "who", "who'll", "whod", + "whoever", "whole", "whom", "whomever", "whos", "whose", "why", "widely", "will", "willing", "wish", "with", + "within", "without", "wont", "words", "work", "worked", "working", "works", "world", "would", "wouldnt", + "www", "x", "y", "year", "years", "yes", "yet", "you", "you'll", "you've", "youd", "young", "younger", + "youngest", "your", "youre", "yours", "yourself", "yourselves", "z", "zero"); public static enum Processor { CUSTOM("custom"), + CORENLP("corenlp"), LUCENE("lucene"), DEFAULT(LUCENE); diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonField.java b/vipra-util/src/main/java/de/vipra/util/an/JsonField.java new file mode 100644 index 0000000000000000000000000000000000000000..058f5301bfa97eca60a9a7f930051accc0c4154a --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/an/JsonField.java @@ -0,0 +1,14 @@ +package de.vipra.util.an; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface JsonField { + + public String value() default ""; + +} diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java b/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java new file mode 100644 index 0000000000000000000000000000000000000000..bd89d6f11dc38327f62c74b6963ddf54e2be9249 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java @@ -0,0 +1,14 @@ +package de.vipra.util.an; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface JsonIgnore { + + public boolean value() default false; + +} diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonType.java b/vipra-util/src/main/java/de/vipra/util/an/JsonType.java new file mode 100644 index 0000000000000000000000000000000000000000..e3a30187c8c6ffa0ec4d83d0a53dd92fe302bcb5 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/an/JsonType.java @@ -0,0 +1,14 @@ +package de.vipra.util.an; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface JsonType { + + public String value(); + +} diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonWrap.java b/vipra-util/src/main/java/de/vipra/util/an/JsonWrap.java new file mode 100644 index 0000000000000000000000000000000000000000..bfeea312be1f171fa8c2918b9411b492698bb196 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/an/JsonWrap.java @@ -0,0 +1,14 @@ +package de.vipra.util.an; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface JsonWrap { + + public String value() default ""; + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index e969f707ef5aebb5393bc8b9554f5eb99c89520b..17531569ba14608ae5d199653a8b0ec11fa3c418 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -2,32 +2,60 @@ package de.vipra.util.model; import java.io.File; import java.io.IOException; +import java.io.Serializable; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; import org.bson.types.ObjectId; +import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.PrePersist; import de.vipra.util.Constants; import de.vipra.util.FileUtils; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; +import de.vipra.util.an.JsonField; -@Entity(value="articles", noClassnameStored=true) -public class Article extends Model { +@Entity(value = "articles", noClassnameStored = true) +public class Article extends Model implements Serializable { + + private static final long serialVersionUID = -3357348905924854240L; @Id private ObjectId id; + + @JsonField("attributes.title") private String title; + + @JsonField("attributes.text") private String text; + + @JsonField("attributes.url") private String url; + + @JsonField("attributes.date") private Date date; + + @JsonField("attributes.complete") private boolean complete; + + @Embedded + @JsonField("attributes.stats") private ArticleStats stats; - private List<TopicCount> topics; + + @Embedded + @JsonField("attributes.topics") + private List<TopicRef> topics; + + @JsonField("attributes.created") + private Date created = new Date(); + + @JsonField("attributes.modified") + private Date modified; public ObjectId getId() { return id; @@ -36,7 +64,7 @@ public class Article extends Model { public void setId(ObjectId id) { this.id = id; } - + public void setId(String id) { this.id = MongoUtils.objectId(id); } @@ -96,14 +124,30 @@ public class Article extends Model { } catch (ParseException e) {} } - public List<TopicCount> getTopics() { + public List<TopicRef> getTopics() { return topics; } - public void setTopics(List<TopicCount> topics) { + public void setTopics(List<TopicRef> topics) { this.topics = topics; } + public Date getCreated() { + return created; + } + + public void setCreated(Date created) { + this.created = created; + } + + public Date getModified() { + return modified; + } + + public void setModified(Date modified) { + this.modified = modified; + } + @Override public void fromFile(File file) throws IOException { List<String> lines = FileUtils.readFile(file); @@ -116,4 +160,15 @@ public class Article extends Model { return getTitle() + "\n" + getText(); } + @PrePersist + public void prePersist() { + this.modified = new Date(); + } + + @Override + public String toString() { + return Article.class.getSimpleName() + "[id:" + id + ", title:" + title + ", url:" + url + ", date:" + date + + ", created:" + created + ", modified:" + modified + "]"; + } + } \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index c120cf8d55b13b000d10b1879ebb3f02e8ca1939..579d5621a7498824adf6ea19ebf9acbc8c749e8a 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -1,18 +1,34 @@ package de.vipra.util.model; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; +import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Embedded; +import org.mongodb.morphia.annotations.Entity; +import org.mongodb.morphia.annotations.Id; -@Embedded -public class ArticleStats { +@Entity +public class ArticleStats implements Serializable { + private static final long serialVersionUID = -4712841724990200627L; + + @Id + private ObjectId id; private long wordCount; private long uniqueWordCount; @Embedded private Map<String, TermFrequency> uniqueWords; + public ObjectId getId() { + return id; + } + + public void setId(ObjectId id) { + this.id = id; + } + public long getWordCount() { return wordCount; } @@ -68,4 +84,10 @@ public class ArticleStats { return stats; } + @Override + public String toString() { + return ArticleStats.class.getSimpleName() + "[id:" + id + ", wordCount:" + wordCount + ", uniqueWordCount:" + + uniqueWordCount + "]"; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/Model.java b/vipra-util/src/main/java/de/vipra/util/model/Model.java index 666b7dc5e76314a7444bbdd2d890570771cab803..70baf2257b93c900ef71832336926302149075bd 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Model.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Model.java @@ -2,6 +2,7 @@ package de.vipra.util.model; import java.io.File; import java.io.IOException; +import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; @@ -10,7 +11,9 @@ import org.bson.types.ObjectId; import de.vipra.util.Constants; -public abstract class Model { +public abstract class Model implements Serializable { + + private static final long serialVersionUID = -1991594352707918633L; public URI uri(URI base) { try { @@ -25,9 +28,9 @@ public abstract class Model { } public abstract ObjectId getId(); - + public abstract void setId(ObjectId id); - + public abstract void setId(String id); public abstract void fromFile(File file) throws IOException; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java index adc0f061703b734a4597734abee4849bd66a8c98..75099a27a65c8fe9e7bbb79fd8302e8809359c0f 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java @@ -1,9 +1,13 @@ package de.vipra.util.model; +import java.io.Serializable; + import org.mongodb.morphia.annotations.Embedded; @Embedded -public class TermFrequency { +public class TermFrequency implements Serializable { + + private static final long serialVersionUID = 4042573510472738071L; private long termFrequency = 0; private double normalizedTermFrequency = 0; @@ -32,4 +36,10 @@ public class TermFrequency { setTermFrequency(getTermFrequency() + 1); } + @Override + public String toString() { + return TermFrequency.class.getSimpleName() + "[termFrequency:" + termFrequency + ", normalizedTermFrequency:" + + normalizedTermFrequency + "]"; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index 24758b3738315476116e23aae29ba6b3a387ff9a..1d9488cd2e2235e8979d82a6fe304e1c1da5aa5b 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -2,26 +2,47 @@ package de.vipra.util.model; import java.io.File; import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Date; import java.util.List; import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.PrePersist; +import de.vipra.util.Constants; import de.vipra.util.MongoUtils; +import de.vipra.util.StringUtils; +import de.vipra.util.an.JsonWrap; import de.vipra.util.ex.NotImplementedException; -@Entity(value="topics", noClassnameStored=true) -public class Topic extends Model { +@Entity(value = "topics", noClassnameStored = true) +public class Topic extends Model implements Serializable { + + private static final long serialVersionUID = 7121629487498450992L; @Id private ObjectId id; + + @JsonWrap("attributes") private int index; + + @JsonWrap("attributes") private String name; + @Embedded + @JsonWrap("attributes") private List<TopicWord> words; + @JsonWrap("attributes") + private Date created = new Date(); + + @JsonWrap("attributes") + private Date modified; + public Topic() {} public Topic(List<TopicWord> words) { @@ -64,6 +85,35 @@ public class Topic extends Model { this.words = words; } + public Date getCreated() { + return created; + } + + public void setCreated(Date created) { + this.created = created; + } + + public Date getModified() { + return modified; + } + + public void setModified(Date modified) { + this.modified = modified; + } + + public String getNameFromWords() { + String name = null; + if (words != null && words.size() > 0) { + int size = Math.min(Constants.AUTO_TOPIC_WORDS, words.size()); + List<String> topWords = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + topWords.add(words.get(i).getWord()); + } + name = StringUtils.join(topWords); + } + return name; + } + @Override public void fromFile(File file) throws IOException { throw new NotImplementedException(); @@ -74,4 +124,15 @@ public class Topic extends Model { throw new NotImplementedException(); } + @Override + public String toString() { + return Topic.class.getSimpleName() + "[id:" + id + ", name:" + name + ", created:" + created + ", modified:" + + modified + "]"; + } + + @PrePersist + public void prePersist() { + this.modified = new Date(); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicCount.java b/vipra-util/src/main/java/de/vipra/util/model/TopicCount.java deleted file mode 100644 index 18c0fb204db0c7a59b556df6e8b9f1479a086c30..0000000000000000000000000000000000000000 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicCount.java +++ /dev/null @@ -1,39 +0,0 @@ -package de.vipra.util.model; - -import org.mongodb.morphia.annotations.Embedded; - -@Embedded -public class TopicCount implements Comparable<TopicCount> { - - private String id; - private int count; - - public TopicCount() {} - - public TopicCount(String id, int count) { - this.id = id; - this.count = count; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public int getCount() { - return count; - } - - public void setCount(int count) { - this.count = count; - } - - @Override - public int compareTo(TopicCount arg0) { - return count - arg0.getCount(); - } - -} diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java new file mode 100644 index 0000000000000000000000000000000000000000..e6b6df22bf13665dbc7b82990bbf34b58002fda1 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java @@ -0,0 +1,46 @@ +package de.vipra.util.model; + +import java.io.Serializable; + +import org.mongodb.morphia.annotations.Embedded; +import org.mongodb.morphia.annotations.Reference; + +@Embedded +public class TopicRef implements Comparable<TopicRef>, Serializable { + + private static final long serialVersionUID = 3301635858822787398L; + + private String topicId; + @Reference(lazy = true) + private Topic topic; + private int count; + + public String getTopicId() { + return topicId; + } + + public void setTopicId(String id) { + this.topicId = id; + this.topic = new Topic(); + this.topic.setId(id); + } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + @Override + public int compareTo(TopicRef arg0) { + return count - arg0.getCount(); + } + + @Override + public String toString() { + return TopicRef.class.getSimpleName() + "[topicId:" + topicId + ",count:" + count + "]"; + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java index b5d61665083eb32a458e3acbc69ec27134f49319..8a1f1f20c20d1f6f950e578b460102d27aaf2a72 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java @@ -1,9 +1,13 @@ package de.vipra.util.model; +import java.io.Serializable; + import org.mongodb.morphia.annotations.Embedded; @Embedded -public class TopicWord { +public class TopicWord implements Comparable<TopicWord>, Serializable { + + private static final long serialVersionUID = -5409441821591159243L; private String word; private double likeliness; @@ -31,4 +35,19 @@ public class TopicWord { this.likeliness = likeliness; } + @Override + public int compareTo(TopicWord o) { + double l = likeliness - o.getLikeliness(); + if (l > 0) + return 1; + if (l < 0) + return -1; + return 0; + } + + @Override + public String toString() { + return TopicWord.class.getSimpleName() + "[word:" + word + ", likeliness:" + likeliness + "]"; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index 29dbc66c2e71fb0313b478e0fbd98a2519fe505d..6172a5e2142ceab036ea990dfed87e82e66e207c 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -6,7 +6,6 @@ import org.mongodb.morphia.Datastore; import org.mongodb.morphia.query.Query; import de.vipra.util.Config; -import de.vipra.util.Constants; import de.vipra.util.Mongo; import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; @@ -60,8 +59,8 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce return datastore.getCount(clazz); } - public static <T extends Model> DatabaseService<T> getDatabaseService(Config config, - Constants.Collection collection, Class<T> clazz) throws ConfigException { + public static <T extends Model> DatabaseService<T> getDatabaseService(Config config, Class<T> clazz) + throws ConfigException { Mongo mongo = Mongo.getInstance(config); return new DatabaseService<T>(mongo, clazz); }