From 085a1da8aca8d650c4133e32192abdb901dcfeee Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Thu, 21 Jan 2016 21:44:20 +0100 Subject: [PATCH] updated rest service added response shortcuts batch creating topics and words on import using words themselves as database ids to skip prior creation added words resource added rest service pagination set default for topics and topic words in topic modeling moved wordmap to proper package added 10-articles test json added exception handling to rest requests added jackson to utils package for annotations removed jsonfield,jsonignore annotations, already exists in jackson --- jgibblda/.project | 6 + ma-impl.sublime-workspace | 41 +++--- vipra-cmd/.project | 6 + .../de/vipra/cmd/lda/JGibbLDAAnalyzer.java | 26 ++-- .../java/de/vipra/cmd/lda/LDAAnalyzer.java | 2 +- .../de/vipra/cmd/option/ClearCommand.java | 7 +- .../de/vipra/cmd/option/DeleteCommand.java | 6 +- .../de/vipra/cmd/option/ImportCommand.java | 136 ++++++++---------- .../de/vipra/cmd/option/StatsCommand.java | 3 +- .../de/vipra/cmd/text/CoreNLPProcessor.java | 2 +- .../de/vipra/cmd/text/StopwordsAnnotator.java | 2 +- vipra-rest/.project | 6 + vipra-rest/pom.xml | 15 +- .../java/de/vipra/rest/model/Wrapper.java | 107 +++++++++++++- .../rest/provider/ObjectMapperProvider.java | 4 + .../vipra/rest/resource/ArticleResource.java | 118 ++++++++------- .../de/vipra/rest/resource/TopicResource.java | 94 +++++++----- .../de/vipra/rest/resource/WordResource.java | 124 ++++++++++++++++ .../rest/serializer/GenericDeserializer.java | 13 +- .../rest/serializer/GenericSerializer.java | 20 +-- .../{articles-list.js => items-list.js} | 2 + vipra-ui/app/components/topics-list.js | 15 -- vipra-ui/app/models/article.js | 2 + vipra-ui/app/models/topic.js | 2 + vipra-ui/app/models/word.js | 9 ++ vipra-ui/app/router.js | 3 + vipra-ui/app/routes/words/index.js | 9 ++ vipra-ui/app/routes/words/show.js | 9 ++ vipra-ui/app/templates/articles/index.hbs | 2 +- .../templates/components/articles-list.hbs | 5 - .../app/templates/components/items-list.hbs | 5 + .../app/templates/components/topics-list.hbs | 5 - vipra-ui/app/templates/index.hbs | 6 +- vipra-ui/app/templates/topics/index.hbs | 2 +- vipra-ui/app/templates/topics/show/index.hbs | 21 ++- vipra-ui/app/templates/words.hbs | 6 + vipra-ui/app/templates/words/index.hbs | 3 + vipra-ui/app/templates/words/show.hbs | 1 + vipra-ui/tests/unit/routes/words-test.js | 11 ++ vipra-util/.project | 6 + vipra-util/pom.xml | 13 ++ .../main/java/de/vipra/util/Constants.java | 56 ++++++-- .../main/java/de/vipra/util/NumberUtils.java | 10 ++ .../de/vipra/util/{model => }/WordMap.java | 49 ++----- .../main/java/de/vipra/util/an/JsonField.java | 14 -- .../java/de/vipra/util/an/JsonIgnore.java | 14 -- .../java/de/vipra/util/model/Article.java | 11 +- .../de/vipra/util/model/ArticleStats.java | 2 + .../java/de/vipra/util/model/FileModel.java | 21 +++ .../main/java/de/vipra/util/model/Model.java | 36 +---- .../main/java/de/vipra/util/model/Topic.java | 20 +-- .../java/de/vipra/util/model/TopicFull.java | 29 ++-- .../java/de/vipra/util/model/TopicRef.java | 3 - .../java/de/vipra/util/model/TopicWord.java | 18 +++ .../main/java/de/vipra/util/model/Word.java | 59 +++++--- .../vipra/util/service/DatabaseService.java | 43 ++++-- .../java/de/vipra/util/service/Service.java | 16 +-- 57 files changed, 834 insertions(+), 442 deletions(-) create mode 100644 vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java rename vipra-ui/app/components/{articles-list.js => items-list.js} (96%) delete mode 100644 vipra-ui/app/components/topics-list.js create mode 100644 vipra-ui/app/models/word.js create mode 100644 vipra-ui/app/routes/words/index.js create mode 100644 vipra-ui/app/routes/words/show.js delete mode 100644 vipra-ui/app/templates/components/articles-list.hbs create mode 100644 vipra-ui/app/templates/components/items-list.hbs delete mode 100644 vipra-ui/app/templates/components/topics-list.hbs create mode 100644 vipra-ui/app/templates/words.hbs create mode 100644 vipra-ui/app/templates/words/index.hbs create mode 100644 vipra-ui/app/templates/words/show.hbs create mode 100644 vipra-ui/tests/unit/routes/words-test.js create mode 100644 vipra-util/src/main/java/de/vipra/util/NumberUtils.java rename vipra-util/src/main/java/de/vipra/util/{model => }/WordMap.java (56%) delete mode 100644 vipra-util/src/main/java/de/vipra/util/an/JsonField.java delete mode 100644 vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java create mode 100644 vipra-util/src/main/java/de/vipra/util/model/FileModel.java diff --git a/jgibblda/.project b/jgibblda/.project index 9686b2ae..2f6739e9 100644 --- a/jgibblda/.project +++ b/jgibblda/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index 1178f378..c8f2f55d 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -454,32 +454,46 @@ "expanded_folders": [ "/home/eike/repos/master/ma-impl", - "/home/eike/repos/master/ma-impl/vipra-cmd", "/home/eike/repos/master/ma-impl/vipra-ui", "/home/eike/repos/master/ma-impl/vipra-ui/app", - "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters", "/home/eike/repos/master/ma-impl/vipra-ui/app/components", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show" + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words", + "/home/eike/repos/master/ma-impl/vm/data" ], "file_history": [ + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words/show.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/components/items-list.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/items-list.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/router.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words/index.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/word.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/topic.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/article.js", + "/home/eike/repos/master/ma-impl/vm/data/test-10.json", "/home/eike/repos/master/ma-impl/vipra-ui/app/components/topic-link.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/show.hbs", "/home/eike/.local/share/vipra/jgibb/jgibb.twords", "/home/eike/.local/share/vipra/jgibb/jgibb.tassign", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/index.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/index.hbs", "/home/eike/Downloads/FRITZ.Box 7490 113.06.30_17.01.16_2147.export", "/home/eike/repos/master/ma-impl/vm/data/test-1.json", "/home/eike/repos/master/ma-impl/vm/data/test-2.json", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/index.js", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/index.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters/application.js", "/home/eike/.local/share/vipra/jgibb/jgibb", "/home/eike/repos/master/ma-doc/thesis/thesis.tex", @@ -488,11 +502,8 @@ "/home/eike/repos/master/ma-impl/vm/webapps/ROOT/index.html", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/index.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models/topic.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/show/edit.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/topics-list.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/router.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/show.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/edit.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show.hbs", @@ -510,7 +521,6 @@ "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/list.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/articles-list.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/list.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models/article.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/components/filter-list.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers/topic-numi.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers/topicname.js", @@ -587,16 +597,7 @@ "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/app/serializers/application.js", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/bower.json", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/bower_components/ember/.bower.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/package.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/app/app.js", - "/home/eike/Repositories/fu/ss15/ma/doc/thesis/thesis.tex", - "/home/eike/Repositories/fu/ss15/ma/impl/Vagrantfile", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-processor-backend/src/main/scala/de/cochu/tmbs/processor/MongoDBTest.scala", - "/home/eike/Repositories/fu/ss15/ma/impl/.gitignore", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/initd-tomcat", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/environment", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/articles.hbs", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/application.hbs" + "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/package.json" ], "find": { diff --git a/vipra-cmd/.project b/vipra-cmd/.project index e6b85466..b44a107f 100644 --- a/vipra-cmd/.project +++ b/vipra-cmd/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java index df84fbe2..217f210e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java @@ -14,14 +14,16 @@ import org.slf4j.LoggerFactory; import de.vipra.cmd.ex.LDAAnalyzerException; import de.vipra.util.Config; +import de.vipra.util.Constants; import de.vipra.util.ConvertStream; +import de.vipra.util.NumberUtils; import de.vipra.util.StringUtils; +import de.vipra.util.WordMap; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; import de.vipra.util.model.TopicWord; import de.vipra.util.model.Word; -import de.vipra.util.model.WordMap; import jgibblda.Estimator; import jgibblda.Inferencer; import jgibblda.LDACmdOption; @@ -30,6 +32,7 @@ import jgibblda.Model; public class JGibbLDAAnalyzer extends LDAAnalyzer { public static final Logger log = LoggerFactory.getLogger(JGibbLDAAnalyzer.class); + public static final String NAME = "jgibb"; private File dataDir; private File modelDir; @@ -51,15 +54,17 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { throw new LDAAnalyzerException(e); } - modelDir = new File(dataDir, "jgibb"); + modelDir = new File(dataDir, NAME); options.dir = modelDir.getAbsolutePath(); - options.estc = new File(modelDir, "jgibb.tassign").exists(); + options.estc = new File(modelDir, NAME + ".tassign").exists(); options.est = !options.estc; + options.K = Constants.K_TOPICS; + options.twords = Constants.K_TOPIC_WORDS; - modelFile = new File(modelDir, "jgibb"); + modelFile = new File(modelDir, NAME); options.dfile = modelFile.getName(); - options.modelName = "jgibb"; + options.modelName = NAME; this.wordMap = wordMap; } @@ -87,7 +92,7 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { @Override public ConvertStream<TopicFull> getTopicDefinitions() throws LDAAnalyzerException { - File twords = new File(modelDir, "jgibb.twords"); + File twords = new File(modelDir, NAME + ".twords"); try { return new ConvertStream<TopicFull>(twords) { @Override @@ -113,7 +118,8 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { String[] parts = nextLine.trim().split("\\s+"); try { Word word = wordMap.get(parts[0]); - double likeliness = Double.parseDouble(parts[1]); + double likeliness = NumberUtils.roundToPrecision(Double.parseDouble(parts[1]), + Constants.LIKELINESS_PRECISION); TopicWord topicWord = new TopicWord(word, likeliness); topicWords.add(topicWord); } catch (NumberFormatException e) { @@ -126,8 +132,8 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { break; } } - Collections.sort(topicWords); - topicDef.setTopicWords(topicWords); + Collections.sort(topicWords, Collections.reverseOrder()); + topicDef.setWords(topicWords); topicDef.setName(TopicFull.getNameFromWords(topicWords)); return topicDef; } @@ -139,7 +145,7 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { @Override public ConvertStream<List<TopicRef>> getTopics() throws LDAAnalyzerException { - File tassign = new File(modelDir, "jgibb.tassign"); + File tassign = new File(modelDir, NAME + ".tassign"); try { return new ConvertStream<List<TopicRef>>(tassign) { @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java index 6a34f1ce..d5ce849f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java @@ -7,9 +7,9 @@ import de.vipra.util.Config; import de.vipra.util.Config.Key; import de.vipra.util.Constants; import de.vipra.util.ConvertStream; +import de.vipra.util.WordMap; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; -import de.vipra.util.model.WordMap; public abstract class LDAAnalyzer { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java index d271eabc..b56dd596 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,9 +25,9 @@ public class ClearCommand implements Command { private boolean defaults; private Config config; - private DatabaseService<ProcessedArticle> dbArticles; - private DatabaseService<TopicFull> dbTopics; - private DatabaseService<Word> dbWords; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; + private DatabaseService<TopicFull, ObjectId> dbTopics; + private DatabaseService<Word, String> dbWords; public ClearCommand(boolean defaults) { this.defaults = defaults; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java index 4795c0cf..ff033a49 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,6 +14,7 @@ import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.file.Filebase; import de.vipra.cmd.model.ProcessedArticle; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.service.DatabaseService; @@ -24,7 +26,7 @@ public class DeleteCommand implements Command { private ArrayList<String> ids = new ArrayList<>(); private Config config; - private DatabaseService<ProcessedArticle> dbArticles; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; private Filebase filebase; /** @@ -53,7 +55,7 @@ public class DeleteCommand implements Command { try { // 1. delete mongodb entry - dbArticles.deleteSingle(id); + dbArticles.deleteSingle(MongoUtils.objectId(id)); } catch (DatabaseException e) { errors.add(e); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 1a54d02d..7cefa773 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -11,6 +11,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import org.bson.types.ObjectId; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; @@ -20,7 +21,6 @@ import org.slf4j.LoggerFactory; import de.vipra.cmd.ExecutionException; import de.vipra.cmd.ex.ImportException; -import de.vipra.cmd.ex.LDAAnalyzerException; import de.vipra.cmd.file.Filebase; import de.vipra.cmd.file.FilebaseIndex; import de.vipra.cmd.lda.LDAAnalyzer; @@ -29,14 +29,15 @@ import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.util.Config; import de.vipra.util.ConvertStream; +import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.Timer; +import de.vipra.util.WordMap; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleStats; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; import de.vipra.util.model.Word; -import de.vipra.util.model.WordMap; import de.vipra.util.service.DatabaseService; public class ImportCommand implements Command { @@ -47,9 +48,9 @@ public class ImportCommand implements Command { private ArrayList<File> files = new ArrayList<>(); private JSONParser parser = new JSONParser(); private Config config; - private DatabaseService<ProcessedArticle> dbArticles; - private DatabaseService<TopicFull> dbTopics; - private DatabaseService<Word> dbWords; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; + private DatabaseService<TopicFull, ObjectId> dbTopics; + private DatabaseService<Word, String> dbWords; private Filebase filebase; private Processor preprocessor; private WordMap wordMap; @@ -161,68 +162,6 @@ public class ImportCommand implements Command { return imported; } - /** - * Saves topic definitions into a database collection. Topic definitions - * contain the words assigned to that topic and the likeliness, that a word - * belongs to that topic. - * - * @throws LDAAnalyzerException - * @throws DatabaseException - */ - private Map<String, String> saveTopicDefinitions() throws LDAAnalyzerException, DatabaseException { - ConvertStream<TopicFull> topics = analyzer.getTopicDefinitions(); - Map<String, String> topicIndexMap = new HashMap<>(); - - // recreate topics in database - // create one topic at a time for less memory usage - dbTopics.drop(); - for (TopicFull topic : topics) { - dbTopics.createSingle(topic); - topicIndexMap.put(Integer.toString(topic.getIndex()), topic.getId().toString()); - } - - return topicIndexMap; - } - - /** - * The analyzer saves the topics assigned to document words in the - * "*.tassign" file. This file is read line by line, each line is a single - * document. The line number corresponds to the line number in the index - * file, which holds the object id of that article. The topics are extracted - * and stored in the document. - * - * @throws LDAAnalyzerException - */ - private void saveTopicsPerDocument(Map<String, String> topicIndexMap) throws LDAAnalyzerException { - ConvertStream<List<TopicRef>> topics = analyzer.getTopics(); - FilebaseIndex index = filebase.getIndex(); - - Iterator<String> indexIter = index.iterator(); - Iterator<List<TopicRef>> topicIter = topics.iterator(); - - while (indexIter.hasNext() && topicIter.hasNext()) { - String id = indexIter.next(); - List<TopicRef> topicCount = topicIter.next(); - for (TopicRef tc : topicCount) { - String oid = topicIndexMap.get(tc.getTopicId()); - if (oid != null) - tc.setTopicId(topicIndexMap.get(tc.getTopicId())); - else - log.error("no object id for topic index " + tc.getTopicId()); - } - ProcessedArticle a = dbArticles.getSingle(id); - if (a != null) - a.setTopics(topicCount); - else - log.error("no article found in db for id " + id); - try { - dbArticles.updateSingle(a); - } catch (DatabaseException e) { - log.error("could not update article: " + a.getTitle() + " (" + a.getId() + ")"); - } - } - } - @Override public void run() throws ExecutionException { try { @@ -242,31 +181,80 @@ public class ImportCommand implements Command { Timer timer = new Timer(); timer.start(); - // import files into database and filebase + /* + * import files into database and filebase + */ out.info("file import"); long imported = importFiles(files); timer.lap("import"); - // write filebase + /* + * write filebase + */ out.info("writing file index"); filebase.close(); timer.lap("filebase write"); - // do topic modeling + /* + * do topic modeling + */ out.info("topic modeling"); analyzer.analyze(); timer.lap("topic modeling"); - // save topic model + /* + * save topic model + */ out.info("saving topic definitions"); - Map<String, String> topicIndexMap = saveTopicDefinitions(); + int batchSize = 100; + ConvertStream<TopicFull> topicDefs = analyzer.getTopicDefinitions(); + Map<String, String> topicIndexMap = new HashMap<>(); + dbTopics.drop(); + List<TopicFull> newTopicDefs = new ArrayList<>(batchSize); + Iterator<TopicFull> it = topicDefs.iterator(); + while (it.hasNext()) { + newTopicDefs.add(it.next()); + if (newTopicDefs.size() == batchSize || !it.hasNext()) { + dbTopics.createMultiple(newTopicDefs); + for (TopicFull newTopicDef : newTopicDefs) + topicIndexMap.put(Integer.toString(newTopicDef.getIndex()), newTopicDef.getId().toString()); + } + } timer.lap("saving topics"); - // save topic refs + /* + * save topic refs + */ out.info("saving document topics"); - saveTopicsPerDocument(topicIndexMap); + ConvertStream<List<TopicRef>> topics = analyzer.getTopics(); + FilebaseIndex index = filebase.getIndex(); + Iterator<String> indexIter = index.iterator(); + Iterator<List<TopicRef>> topicIter = topics.iterator(); + while (indexIter.hasNext() && topicIter.hasNext()) { + List<TopicRef> topicCount = topicIter.next(); + for (TopicRef tc : topicCount) { + String oid = topicIndexMap.get(tc.getTopicId()); + tc.setTopicId(oid); + if (oid == null) + log.error("no object id for topic index " + tc.getTopicId()); + } + String id = indexIter.next(); + ProcessedArticle a = dbArticles.getSingle(MongoUtils.objectId(id)); + if (a != null) + a.setTopics(topicCount); + else + log.error("no article found in db for id " + id); + try { + dbArticles.updateSingle(a); + } catch (DatabaseException e) { + log.error("could not update article: " + a.getTitle() + " (" + a.getId() + ")"); + } + } timer.lap("saving topic refs"); + /* + * run information + */ out.info("imported " + imported + " new " + StringUtils.quantity(imported, "article")); long newWords = wordMap.getNewWords(); out.info("imported " + newWords + " new " + StringUtils.quantity(newWords, "word")); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java index 4793f14f..d8ed55cf 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java @@ -3,6 +3,7 @@ package de.vipra.cmd.option; import java.io.File; import java.io.IOException; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +23,7 @@ public class StatsCommand implements Command { private Config config; private Filebase filebase; - private DatabaseService<TopicFull> dbTopics; + private DatabaseService<TopicFull, ObjectId> dbTopics; private void stats() { File modelFile = filebase.getModelFile(); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java index afc32c13..87f70cb1 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java @@ -30,7 +30,7 @@ public class CoreNLPProcessor extends Processor { @Override public ProcessedText preprocess(String input) throws PreprocessorException { - Annotation doc = new Annotation(input); + Annotation doc = new Annotation(input.toLowerCase()); nlp.annotate(doc); StringBuilder sb = new StringBuilder(); List<CoreMap> sentences = doc.get(SentencesAnnotation.class); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java index 5415cde7..838bbf03 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java @@ -27,7 +27,7 @@ public class StopwordsAnnotator implements Annotator, CoreAnnotation<Boolean> { public void annotate(Annotation annotation) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); for (CoreLabel token : tokens) { - if (stopWords.contains(token.word().toLowerCase())) { + if (stopWords.contains(token.word())) { token.set(StopwordsAnnotator.class, true); } } diff --git a/vipra-rest/.project b/vipra-rest/.project index 6c0bf9d8..569e5873 100644 --- a/vipra-rest/.project +++ b/vipra-rest/.project @@ -31,6 +31,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -39,5 +44,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> <nature>org.eclipse.wst.jsdt.core.jsNature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-rest/pom.xml b/vipra-rest/pom.xml index ff073cf9..9606dffe 100644 --- a/vipra-rest/pom.xml +++ b/vipra-rest/pom.xml @@ -18,6 +18,7 @@ <jettyVersion>9.3.6.v20151106</jettyVersion> <servletVersion>3.1.0</servletVersion> <log4jVersion>2.4.1</log4jVersion> + <jacksonVersion>2.7.0</jacksonVersion> </properties> <build> @@ -34,7 +35,19 @@ <dependency> <groupId>org.glassfish.jersey.media</groupId> <artifactId>jersey-media-json-jackson</artifactId> - <version>2.22.1</version> + <version>${jerseyVersion}</version> + </dependency> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jacksonVersion}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jacksonVersion}</version> </dependency> <!-- Servlet API --> diff --git a/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java b/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java index 5c43b033..d18f8d75 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java +++ b/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java @@ -1,12 +1,23 @@ package de.vipra.rest.model; +import java.net.URI; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; + +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.Response.Status; +import javax.ws.rs.core.UriBuilder; + +import de.vipra.rest.Messages; public class Wrapper<T> { private T data; private List<APIError> errors; + private Map<String, String> links; private final APIVersion jsonapi = new APIVersion(); public Wrapper() {} @@ -38,12 +49,25 @@ public class Wrapper<T> { } public void addError(APIError error) { - if (errors == null) { + if (errors == null) errors = new ArrayList<>(); - } errors.add(error); } + public Map<String, String> getLinks() { + return links; + } + + public void setLinks(Map<String, String> links) { + this.links = links; + } + + public void addLink(String key, String link) { + if (links == null) + links = new HashMap<>(); + links.put(key, link); + } + public APIVersion getJsonapi() { return jsonapi; } @@ -52,4 +76,83 @@ public class Wrapper<T> { return data != null ? Integer.toString(data.hashCode()) : null; } + public void addPaginationLinks(URI base, Integer skip, Integer limit, long count) { + if (skip == null || limit == null || limit == 0) + return; + + if (skip < 0) { + addError(new APIError(Response.Status.BAD_REQUEST, "Wrong skip number", + String.format(Messages.BAD_REQUEST, "skip number must be greater or equal to 0"))); + return; + } + + if (limit < 0) { + addError(new APIError(Response.Status.BAD_REQUEST, "Wrong limit size", + String.format(Messages.BAD_REQUEST, "when using skip, limit size must be greater or equal to 1"))); + return; + } + + addLink("first", UriBuilder.fromUri(base).queryParam("skip", 0).queryParam("limit", limit).build().toString()); + addLink("last", UriBuilder.fromUri(base).queryParam("skip", (count / limit) * limit).queryParam("limit", limit) + .build().toString()); + + if (skip > 0) { + int diff = skip % limit; + if (diff == 0) + diff = limit; + int prevSkip = Math.max(0, skip - diff); + addLink("prev", UriBuilder.fromUri(base).queryParam("skip", prevSkip).queryParam("limit", limit).build() + .toString()); + } + + if (skip + limit < count) { + int diff = limit - skip % limit; + if (diff == 0) + diff = limit; + int nextSkip = skip + diff; + addLink("next", UriBuilder.fromUri(base).queryParam("skip", nextSkip).queryParam("limit", limit).build() + .toString()); + } + } + + public boolean hasErrors() { + return errors != null && errors.size() > 0; + } + + public Response ok() { + return Response.ok().entity(this).tag(tag()).build(); + } + + public Response ok(T data) { + this.data = data; + return ok(); + } + + public Response badRequest() { + return Response.status(Status.BAD_REQUEST).entity(this).build(); + } + + public Response serverError() { + return Response.serverError().entity(this).build(); + } + + public Response created(URI loc) { + return Response.created(loc).entity(this).build(); + } + + public Response notFound() { + return Response.status(Status.NOT_FOUND).entity(this).build(); + } + + public Response noContent() { + return Response.noContent().build(); + } + + public Response status(Status status, boolean withEntity) { + ResponseBuilder r = Response.status(status); + if (withEntity) + r.entity(this); + return r.build(); + } + } diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java index e7f5577a..2b1060cf 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java +++ b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java @@ -21,6 +21,7 @@ import de.vipra.rest.serializer.ObjectIdSerializer; import de.vipra.util.Constants; import de.vipra.util.model.Article; import de.vipra.util.model.TopicFull; +import de.vipra.util.model.Word; @Provider public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { @@ -46,6 +47,9 @@ public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { module.addSerializer(TopicFull.class, new GenericSerializer<TopicFull>(TopicFull.class)); module.addDeserializer(TopicFull.class, new GenericDeserializer<TopicFull>(TopicFull.class)); + module.addSerializer(Word.class, new GenericSerializer<Word>(Word.class)); + module.addDeserializer(Word.class, new GenericDeserializer<Word>(Word.class)); + module.addSerializer(ObjectId.class, new ObjectIdSerializer()); module.addDeserializer(ObjectId.class, new ObjectIdDeserializer()); diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java index bf182eaa..79d0ccc0 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java @@ -1,6 +1,10 @@ package de.vipra.rest.resource; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.util.List; import javax.servlet.ServletContext; @@ -18,16 +22,17 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import org.bson.types.ObjectId; import org.ehcache.Cache; import org.ehcache.CacheManager; import org.ehcache.config.CacheConfigurationBuilder; import de.vipra.rest.APIMediaType; import de.vipra.rest.Messages; -import de.vipra.rest.PATCH; import de.vipra.rest.model.APIError; import de.vipra.rest.model.Wrapper; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.Article; @@ -39,8 +44,8 @@ public class ArticleResource { @Context UriInfo uri; - final Cache<String, Article> articleCache; - final DatabaseService<Article> service; + final Cache<String, Article> cache; + final DatabaseService<Article, ObjectId> service; public ArticleResource(@Context ServletContext servletContext) throws ConfigException, IOException { Config config = Config.getConfig(); @@ -51,40 +56,56 @@ public class ArticleResource { if (articleCache == null) articleCache = manager.createCache("articlecache", CacheConfigurationBuilder.newCacheConfigurationBuilder().buildConfig(String.class, Article.class)); - this.articleCache = articleCache; + this.cache = articleCache; } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) - public Response getArticles(@QueryParam("skip") @DefaultValue("0") int skip, - @QueryParam("limit") @DefaultValue("0") int limit, - @QueryParam("sort") @DefaultValue("date") String sortBy) { - List<Article> articles = service.getMultiple(skip, limit, sortBy); - Wrapper<List<Article>> res = new Wrapper<>(articles); - return Response.ok().entity(res).tag(res.tag()).build(); + public Response getArticles(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("date") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<Article>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return res.badRequest(); + + try { + List<Article> articles = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(articles); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) @Consumes(APIMediaType.APPLICATION_JSONAPI) @Path("{id}") - public Response getArticle(@PathParam("id") String id) { + public Response getArticle(@PathParam("id") String id, @QueryParam("fields") String fields) { Wrapper<Article> res = new Wrapper<>(); if (id == null || id.trim().length() == 0) { res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", String.format(Messages.BAD_REQUEST, "id cannot be empty"))); - return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + return res.badRequest(); } - Article article = getSingle(id); + Article article; + try { + article = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } if (article != null) { - res.setData(article); - return Response.ok().entity(res).tag(res.tag()).build(); + return res.ok(article); } else { - String msg = String.format(Messages.NOT_FOUND, "article", id); - res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", msg)); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", + String.format(Messages.NOT_FOUND, "article", id))); + return res.notFound(); } } @@ -96,11 +117,12 @@ public class ArticleResource { try { article = service.createSingle(article); res = new Wrapper<>(article); - return Response.created(article.uri(uri.getAbsolutePath())).entity(res).tag(res.tag()).build(); - } catch (DatabaseException e) { + URI newUri = new URL(uri.getAbsolutePath().toURL(), article.getId().toString()).toURI(); + return res.created(newUri); + } catch (DatabaseException | MalformedURLException | URISyntaxException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be created", "item could not be created due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } @@ -110,23 +132,23 @@ public class ArticleResource { Wrapper<Article> res = new Wrapper<>(); long deleted; try { - deleted = service.deleteSingle(id); + deleted = service.deleteSingle(MongoUtils.objectId(id)); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be deleted", "item could not be created due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } - articleCache.remove(id); + cache.remove(id); int del = deleted > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) deleted; switch (del) { case 0: res.addError(new APIError(Response.Status.NOT_FOUND, "Article not found", String.format(Messages.NOT_FOUND, "article", id))); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + return res.notFound(); case 1: - return Response.noContent().build(); + return res.noContent(); default: - return Response.serverError().build(); + return res.serverError(); } } @@ -139,35 +161,35 @@ public class ArticleResource { Wrapper<Article> res = new Wrapper<>(); try { service.updateSingle(article); - articleCache.put(id, article); - res.setData(article); - return Response.ok().entity(res).tag(res.tag()).build(); + cache.put(id, article); + return res.ok(article); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be updated", "item could not be updated due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } - @PATCH - @Consumes(APIMediaType.APPLICATION_JSONAPI) - @Produces(APIMediaType.APPLICATION_JSONAPI) - @Path("{id}") - public Response updateArticle(@PathParam("id") String id, Wrapper<Article> wrapper) { - Article newArticle = wrapper.getData(); - Article article = getSingle(id); - // TODO implement - return null; + private Article getSingle(String id, String[] fields) { + if (fields.length == 0) { + Article article = cache.get(id); + if (article == null) { + article = service.getSingle(MongoUtils.objectId(id)); + if (article != null) + cache.put(id, article); + } + return article; + } else + return service.getSingle(MongoUtils.objectId(id), fields); } - private Article getSingle(String id) { - Article article = articleCache.get(id); - if (article == null) { - article = service.getSingle(id); - if (article != null) - articleCache.put(id, article); - } - return article; + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java index 44cc2964..bf471644 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java @@ -16,16 +16,17 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import org.bson.types.ObjectId; import org.ehcache.Cache; import org.ehcache.CacheManager; import org.ehcache.config.CacheConfigurationBuilder; import de.vipra.rest.APIMediaType; import de.vipra.rest.Messages; -import de.vipra.rest.PATCH; import de.vipra.rest.model.APIError; import de.vipra.rest.model.Wrapper; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.TopicFull; @@ -37,8 +38,8 @@ public class TopicResource { @Context UriInfo uri; - final Cache<String, TopicFull> topicCache; - final DatabaseService<TopicFull> service; + final Cache<String, TopicFull> cache; + final DatabaseService<TopicFull, ObjectId> service; public TopicResource(@Context ServletContext servletContext) throws ConfigException, IOException { Config config = Config.getConfig(); @@ -49,39 +50,56 @@ public class TopicResource { if (topicCache == null) topicCache = manager.createCache("topiccache", CacheConfigurationBuilder.newCacheConfigurationBuilder() .buildConfig(String.class, TopicFull.class)); - this.topicCache = topicCache; + this.cache = topicCache; } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) - public Response getTopics(@QueryParam("skip") @DefaultValue("0") int skip, - @QueryParam("limit") @DefaultValue("0") int limit) { - List<TopicFull> topics = service.getMultiple(skip, limit, null); - Wrapper<List<TopicFull>> res = new Wrapper<>(topics); - return Response.ok().entity(res).tag(res.tag()).build(); + public Response getTopics(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("date") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<TopicFull>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + + try { + List<TopicFull> topics = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(topics); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + } } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) @Consumes(APIMediaType.APPLICATION_JSONAPI) @Path("{id}") - public Response getTopic(@PathParam("id") String id) { + public Response getTopic(@PathParam("id") String id, @QueryParam("fields") String fields) { Wrapper<TopicFull> res = new Wrapper<>(); if (id == null || id.trim().length() == 0) { res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", String.format(Messages.BAD_REQUEST, "id cannot be empty"))); - return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + return res.badRequest(); } - TopicFull topic = getSingle(id); + TopicFull topic; + try { + topic = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } - if (topic != null) { - res.setData(topic); - return Response.ok().entity(res).tag(res.tag()).build(); - } else { + if (topic != null) + return res.ok(topic); + else { res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", String.format(Messages.NOT_FOUND, "topic", id))); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + return res.notFound(); } } @@ -94,35 +112,35 @@ public class TopicResource { Wrapper<TopicFull> res = new Wrapper<>(); try { service.updateSingle(topic); - topicCache.put(id, topic); - res.setData(topic); - return Response.ok().entity(res).tag(res.tag()).build(); + cache.put(id, topic); + return res.ok(topic); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be updated", "item could not be updated due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } - @PATCH - @Consumes(APIMediaType.APPLICATION_JSONAPI) - @Produces(APIMediaType.APPLICATION_JSONAPI) - @Path("{id}") - public Response updateTopic(@PathParam("id") String id, Wrapper<TopicFull> wrapper) { - TopicFull newTopic = wrapper.getData(); - TopicFull topic = getSingle(id); - // TODO implement - return null; + private TopicFull getSingle(String id, String[] fields) { + if (fields.length == 0) { + TopicFull topic = cache.get(id); + if (topic == null) { + topic = service.getSingle(MongoUtils.objectId(id)); + if (topic != null) + cache.put(id, topic); + } + return topic; + } else + return service.getSingle(MongoUtils.objectId(id), fields); } - private TopicFull getSingle(String id) { - TopicFull topic = topicCache.get(id); - if (topic == null) { - topic = service.getSingle(id); - if (topic != null) - topicCache.put(id, topic); - } - return topic; + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java new file mode 100644 index 00000000..b765eafb --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java @@ -0,0 +1,124 @@ +package de.vipra.rest.resource; + +import java.io.IOException; +import java.util.List; + +import javax.servlet.ServletContext; +import javax.ws.rs.Consumes; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriInfo; + +import org.ehcache.Cache; +import org.ehcache.CacheManager; +import org.ehcache.config.CacheConfigurationBuilder; + +import de.vipra.rest.APIMediaType; +import de.vipra.rest.Messages; +import de.vipra.rest.model.APIError; +import de.vipra.rest.model.Wrapper; +import de.vipra.util.Config; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.model.Word; +import de.vipra.util.service.DatabaseService; + +@Path("words") +public class WordResource { + + @Context + UriInfo uri; + + final Cache<String, Word> cache; + final DatabaseService<Word, String> service; + + public WordResource(@Context ServletContext servletContext) throws ConfigException, IOException { + Config config = Config.getConfig(); + service = DatabaseService.getDatabaseService(config, Word.class); + + CacheManager manager = (CacheManager) servletContext.getAttribute("cachemanager"); + Cache<String, Word> wordCache = manager.getCache("wordcache", String.class, Word.class); + if (wordCache == null) + wordCache = manager.createCache("wordcache", + CacheConfigurationBuilder.newCacheConfigurationBuilder().buildConfig(String.class, Word.class)); + this.cache = wordCache; + } + + @GET + @Produces(APIMediaType.APPLICATION_JSONAPI) + public Response getWords(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("word") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<Word>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return res.badRequest(); + + try { + List<Word> words = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(words); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } + } + + @GET + @Produces(APIMediaType.APPLICATION_JSONAPI) + @Consumes(APIMediaType.APPLICATION_JSONAPI) + @Path("{id}") + public Response getWord(@PathParam("id") String id, @QueryParam("fields") String fields) { + Wrapper<Word> res = new Wrapper<>(); + if (id == null || id.trim().length() == 0) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", + String.format(Messages.BAD_REQUEST, "id cannot be empty"))); + return res.badRequest(); + } + + Word word; + try { + word = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } + + if (word != null) + return res.ok(word); + else { + String msg = String.format(Messages.NOT_FOUND, "word", id); + res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", msg)); + return res.notFound(); + } + } + + private Word getSingle(String id, String[] fields) { + if (fields.length > 0) { + Word word = cache.get(id); + if (word == null) { + word = service.getSingle(id); + if (word != null) + cache.put(id, word); + } + return word; + } else + return service.getSingle(id, fields); + } + + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java index ff9ea7d9..5945e5cf 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java @@ -17,7 +17,6 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; -import de.vipra.util.an.JsonField; import de.vipra.util.an.JsonWrap; import de.vipra.util.model.Model; @@ -42,18 +41,14 @@ public class GenericDeserializer<T extends Model> extends JsonDeserializer<T> { String name = field.getName(); - JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); - if (jw != null) - name = jw.value() + "." + name; - - JsonField jf = field.getDeclaredAnnotation(JsonField.class); - if (jf != null) - name = jf.value(); - JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); if (jp != null) name = jp.value(); + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + allFields.put(name, field); String[] parts = name.split("\\."); diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java index b7c896c0..7d76f86e 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java @@ -11,13 +11,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; -import de.vipra.util.an.JsonField; import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.model.Model; @@ -42,26 +42,20 @@ public class GenericSerializer<T extends Model> extends JsonSerializer<T> { if (Modifier.isPrivate(modifiers) && !Modifier.isStatic(modifiers)) { field.setAccessible(true); - com.fasterxml.jackson.annotation.JsonIgnore ji1 = field - .getDeclaredAnnotation(com.fasterxml.jackson.annotation.JsonIgnore.class); - de.vipra.util.an.JsonIgnore ji2 = field.getDeclaredAnnotation(de.vipra.util.an.JsonIgnore.class); - if ((ji1 != null && ji1.value()) || (ji2 != null && ji2.value())) + JsonIgnore ji = field.getDeclaredAnnotation(JsonIgnore.class); + if (ji != null && ji.value()) continue; String name = field.getName(); - JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); - if (jw != null) - name = jw.value() + "." + name; - - JsonField jf = field.getDeclaredAnnotation(JsonField.class); - if (jf != null) - name = jf.value(); - JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); if (jp != null) name = jp.value(); + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + foundFields.put(name, field); } } diff --git a/vipra-ui/app/components/articles-list.js b/vipra-ui/app/components/items-list.js similarity index 96% rename from vipra-ui/app/components/articles-list.js rename to vipra-ui/app/components/items-list.js index 88005348..859d9ed8 100644 --- a/vipra-ui/app/components/articles-list.js +++ b/vipra-ui/app/components/items-list.js @@ -2,6 +2,8 @@ import Ember from 'ember'; export default Ember.Component.extend({ + route: "", + filteredItems: Ember.computed('items', 'filter', function() { var keyword = this.get('filter'); var filtered = this.get('items'); diff --git a/vipra-ui/app/components/topics-list.js b/vipra-ui/app/components/topics-list.js deleted file mode 100644 index 88005348..00000000 --- a/vipra-ui/app/components/topics-list.js +++ /dev/null @@ -1,15 +0,0 @@ -import Ember from 'ember'; - -export default Ember.Component.extend({ - - filteredItems: Ember.computed('items', 'filter', function() { - var keyword = this.get('filter'); - var filtered = this.get('items'); - if (keyword) { - keyword = keyword.toLowerCase().trim(); - filtered = this.get('items').filter((item) => item.get('_name').toLowerCase().includes(keyword)); - } - return filtered; - }) - -}); \ No newline at end of file diff --git a/vipra-ui/app/models/article.js b/vipra-ui/app/models/article.js index 0c23451f..fc38e095 100644 --- a/vipra-ui/app/models/article.js +++ b/vipra-ui/app/models/article.js @@ -7,6 +7,8 @@ export default DS.Model.extend({ date: DS.attr('date'), stats: DS.attr(), topics: DS.attr(), + created: DS.attr('date'), + modified: DS.attr('date'), _name: function() { var title = this.get('title'); diff --git a/vipra-ui/app/models/topic.js b/vipra-ui/app/models/topic.js index 73627bf9..f1aa4bc5 100644 --- a/vipra-ui/app/models/topic.js +++ b/vipra-ui/app/models/topic.js @@ -4,6 +4,8 @@ export default DS.Model.extend({ name: DS.attr(), index: DS.attr(), words: DS.attr(), + created: DS.attr('date'), + modified: DS.attr('date'), _name: function() { var name = this.get('name'); diff --git a/vipra-ui/app/models/word.js b/vipra-ui/app/models/word.js new file mode 100644 index 00000000..a18091a1 --- /dev/null +++ b/vipra-ui/app/models/word.js @@ -0,0 +1,9 @@ +import DS from 'ember-data'; + +export default DS.Model.extend({ + word: DS.attr(), + + _name: function() { + return this.get('id'); + }.property('id') +}); \ No newline at end of file diff --git a/vipra-ui/app/router.js b/vipra-ui/app/router.js index e856b110..1d31b063 100644 --- a/vipra-ui/app/router.js +++ b/vipra-ui/app/router.js @@ -14,6 +14,9 @@ Router.map(function() { this.route('edit'); }); }); + this.route('words', function() { + this.route('show', { path: '/:word_id' }); + }); this.route('not-found', { path: '/*:' }); }); diff --git a/vipra-ui/app/routes/words/index.js b/vipra-ui/app/routes/words/index.js new file mode 100644 index 00000000..e83ce4c6 --- /dev/null +++ b/vipra-ui/app/routes/words/index.js @@ -0,0 +1,9 @@ +import Ember from 'ember'; + +export default Ember.Route.extend({ + model() { + return Ember.RSVP.hash({ + words: this.store.findAll('word') + }); + } +}); \ No newline at end of file diff --git a/vipra-ui/app/routes/words/show.js b/vipra-ui/app/routes/words/show.js new file mode 100644 index 00000000..e545d25b --- /dev/null +++ b/vipra-ui/app/routes/words/show.js @@ -0,0 +1,9 @@ +import Ember from 'ember'; + +export default Ember.Route.extend({ + model(params) { + return Ember.RSVP.hash({ + word: this.store.find('word', params.word_id) + }); + } +}); \ No newline at end of file diff --git a/vipra-ui/app/templates/articles/index.hbs b/vipra-ui/app/templates/articles/index.hbs index 9b8d3390..e975dcaa 100644 --- a/vipra-ui/app/templates/articles/index.hbs +++ b/vipra-ui/app/templates/articles/index.hbs @@ -4,4 +4,4 @@ {{debounced-input placeholder='Filter' size='50' value=filter debounce='150'}} -{{articles-list items=model.articles filter=filter}} \ No newline at end of file +{{items-list items=model.articles filter=filter route='articles.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/components/articles-list.hbs b/vipra-ui/app/templates/components/articles-list.hbs deleted file mode 100644 index c4c3d1af..00000000 --- a/vipra-ui/app/templates/components/articles-list.hbs +++ /dev/null @@ -1,5 +0,0 @@ -<ol> - {{#each filteredItems as |article|}} - <li>{{#link-to 'articles.show' article.id}}{{text-marker text=article.title mark=filter}}{{/link-to}}</li> - {{/each}} -</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/components/items-list.hbs b/vipra-ui/app/templates/components/items-list.hbs new file mode 100644 index 00000000..8402078b --- /dev/null +++ b/vipra-ui/app/templates/components/items-list.hbs @@ -0,0 +1,5 @@ +<ol> + {{#each filteredItems as |item|}} + <li>{{#link-to route item.id}}{{text-marker text=item._name mark=filter}}{{/link-to}}</li> + {{/each}} +</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/components/topics-list.hbs b/vipra-ui/app/templates/components/topics-list.hbs deleted file mode 100644 index 5ab07c25..00000000 --- a/vipra-ui/app/templates/components/topics-list.hbs +++ /dev/null @@ -1,5 +0,0 @@ -<ol> - {{#each filteredItems as |topic|}} - <li>{{#link-to 'topics.show' topic.id}}{{text-marker text=topic._name mark=filter}}{{/link-to}}</li> - {{/each}} -</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/index.hbs b/vipra-ui/app/templates/index.hbs index e0aaedcb..c7e27e93 100644 --- a/vipra-ui/app/templates/index.hbs +++ b/vipra-ui/app/templates/index.hbs @@ -1,5 +1,5 @@ <h1>Vipra</h1> -{{#link-to 'articles'}}Articles{{/link-to}} - -{{#link-to 'topics'}}Topics{{/link-to}} \ No newline at end of file +{{#link-to 'articles'}}Articles{{/link-to}}<br> +{{#link-to 'topics'}}Topics{{/link-to}}<br> +{{#link-to 'words'}}Words{{/link-to}} \ No newline at end of file diff --git a/vipra-ui/app/templates/topics/index.hbs b/vipra-ui/app/templates/topics/index.hbs index a8a4eece..9b46b3c2 100644 --- a/vipra-ui/app/templates/topics/index.hbs +++ b/vipra-ui/app/templates/topics/index.hbs @@ -2,4 +2,4 @@ {{debounced-input placeholder='Filter' size='50' value=filter debounce='150'}} -{{topics-list items=model.topics filter=filter}} \ No newline at end of file +{{items-list items=model.topics filter=filter route='topics.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/topics/show/index.hbs b/vipra-ui/app/templates/topics/show/index.hbs index e7f21034..c2787301 100644 --- a/vipra-ui/app/templates/topics/show/index.hbs +++ b/vipra-ui/app/templates/topics/show/index.hbs @@ -1,3 +1,22 @@ {{#link-to 'topics.show.edit'}}Edit{{/link-to}} -<h2>{{model.topic._name}}</h2> \ No newline at end of file +<h2>{{model.topic._name}}</h2> + +<h3>Words</h3> + +<table> + <thead> + <tr> + <th>Word</th> + <th>Likeliness</th> + </tr> + </thead> + <tbody> + {{#each model.topic.words as |word|}} + <tr> + <td>{{word.word}}</td> + <td>{{word.likeliness}}</td> + </tr> + {{/each}} + </tbody> +</table> \ No newline at end of file diff --git a/vipra-ui/app/templates/words.hbs b/vipra-ui/app/templates/words.hbs new file mode 100644 index 00000000..13d6081c --- /dev/null +++ b/vipra-ui/app/templates/words.hbs @@ -0,0 +1,6 @@ +<h1>Words</h1> +{{#link-to 'index'}}Top{{/link-to}} +{{#link-to 'words'}}All{{/link-to}} +<hr> + +{{outlet}} \ No newline at end of file diff --git a/vipra-ui/app/templates/words/index.hbs b/vipra-ui/app/templates/words/index.hbs new file mode 100644 index 00000000..3f0dc91a --- /dev/null +++ b/vipra-ui/app/templates/words/index.hbs @@ -0,0 +1,3 @@ +<h2>Found words</h2> + +{{items-list items=model.words filter=filter route='words.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/words/show.hbs b/vipra-ui/app/templates/words/show.hbs new file mode 100644 index 00000000..2778b0d5 --- /dev/null +++ b/vipra-ui/app/templates/words/show.hbs @@ -0,0 +1 @@ +<h2>{{model.word.id}}</h2> \ No newline at end of file diff --git a/vipra-ui/tests/unit/routes/words-test.js b/vipra-ui/tests/unit/routes/words-test.js new file mode 100644 index 00000000..ce62d399 --- /dev/null +++ b/vipra-ui/tests/unit/routes/words-test.js @@ -0,0 +1,11 @@ +import { moduleFor, test } from 'ember-qunit'; + +moduleFor('route:words', 'Unit | Route | words', { + // Specify the other units that are required for this test. + // needs: ['controller:foo'] +}); + +test('it exists', function(assert) { + let route = this.subject(); + assert.ok(route); +}); diff --git a/vipra-util/.project b/vipra-util/.project index 3eb2ee19..8f910748 100644 --- a/vipra-util/.project +++ b/vipra-util/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-util/pom.xml b/vipra-util/pom.xml index daa885aa..78096310 100644 --- a/vipra-util/pom.xml +++ b/vipra-util/pom.xml @@ -11,6 +11,7 @@ <maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.source>1.8</maven.compiler.source> <log4jVersion>2.4.1</log4jVersion> + <jacksonVersion>2.7.0</jacksonVersion> </properties> <dependencies> @@ -61,5 +62,17 @@ <artifactId>elasticsearch</artifactId> <version>2.1.0</version> </dependency> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jacksonVersion}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jacksonVersion}</version> + </dependency> </dependencies> </project> \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 22072d5d..90aff81b 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -30,25 +30,38 @@ public class Constants { public static final int DEFAULT_PORT = 27017; public static final String DEFAULT_DB = "test"; + /* + * TOPIC MODELING + */ + /** - * The global date time format. Will be used for conversion from and to - * database and frontend dates. + * The number of words to be used to generate a topic name. The top n words + * (sorted by likeliness) are used to generate a name for unnamed topics. */ - public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; + public static final int AUTO_TOPIC_WORDS = 4; /** - * Disallowed chars for words in processed text segments. This regular - * expression is used to strip text of characters that should not be - * processed. + * Number of topics to discover with topic modeling, if the selected topic + * modeling library supports this parameter. */ - public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9 ]"; + public static final int K_TOPICS = 20; /** - * The number of words to be used to generate a topic name. The top n words - * (sorted by likeliness) are used to generate a name for unnamed topics. + * Number of words in a discovered topic, if the selected topic modeling + * library supports this parameter. */ - public static final int AUTO_TOPIC_WORDS = 4; + public static final int K_TOPIC_WORDS = 50; + /** + * Precision of likeliness numbers. Likeliness is calculated for words to + * belong to topics. + */ + public static final int LIKELINESS_PRECISION = 6; + + /** + * Stopwords list. Extensive list of stopwords used to clean imported + * articles of the most common words before topic modeling is applied. + */ public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "able", "about", "above", "abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected", "affecting", "affects", "after", "afterwards", "again", "against", "ah", "all", "almost", "alone", "along", @@ -123,6 +136,26 @@ public class Constants { "www", "x", "y", "year", "years", "yes", "yet", "you", "you'll", "you've", "youd", "young", "younger", "youngest", "your", "youre", "yours", "yourself", "yourselves", "z", "zero"); + /** + * Disallowed chars for words in processed text segments. This regular + * expression is used to strip text of characters that should not be + * processed. + */ + public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9 ]"; + + /* + * OTHER + */ + + /** + * The global date time format. Will be used for conversion from and to + * database and frontend dates. + */ + public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; + + /** + * The text processors available, including the default text processor + */ public static enum Processor { CUSTOM("custom"), CORENLP("corenlp"), @@ -151,6 +184,9 @@ public class Constants { } } + /** + * The topic modeling analyzers available, including the default analyzer. + */ public static enum Analyzer { JGIBB("jgibb"), DEFAULT(JGIBB); diff --git a/vipra-util/src/main/java/de/vipra/util/NumberUtils.java b/vipra-util/src/main/java/de/vipra/util/NumberUtils.java new file mode 100644 index 00000000..4206e9fd --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/NumberUtils.java @@ -0,0 +1,10 @@ +package de.vipra.util; + +public class NumberUtils { + + public static double roundToPrecision(double d, int precision) { + double p = Math.pow(10, precision); + return Math.round(d * p) / p; + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/WordMap.java b/vipra-util/src/main/java/de/vipra/util/WordMap.java similarity index 56% rename from vipra-util/src/main/java/de/vipra/util/model/WordMap.java rename to vipra-util/src/main/java/de/vipra/util/WordMap.java index 76351d34..69be359d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/WordMap.java +++ b/vipra-util/src/main/java/de/vipra/util/WordMap.java @@ -1,63 +1,46 @@ -package de.vipra.util.model; +package de.vipra.util; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.vipra.util.ex.DatabaseException; +import de.vipra.util.model.Word; import de.vipra.util.service.DatabaseService; -public class WordMap extends HashMap<String, Word> { +public class WordMap { - private static final long serialVersionUID = 8321873837437524923L; public static final Logger log = LoggerFactory.getLogger(WordMap.class); - private final DatabaseService<Word> dbWords; + private final DatabaseService<Word, String> dbWords; + private final Map<String, Word> wordMap; private boolean createNow = true; private long newWords = 0; - public WordMap(DatabaseService<Word> dbWords) { + public WordMap(DatabaseService<Word, String> dbWords) { this.dbWords = dbWords; + this.wordMap = new HashMap<>(); List<Word> words = dbWords.getAll(); for (Word word : words) - put(word.getWord().toLowerCase(), word); + wordMap.put(word.getWord().toLowerCase(), word); } - @Override public Word get(Object w) { String strWord = w.toString(); - Word word = super.get(strWord.toLowerCase()); + Word word = wordMap.get(strWord.toLowerCase()); if (word == null) { word = new Word(strWord); createWord(word); + wordMap.put(strWord, word); } return word; } - @Override - public Word put(String strWord, Word word) { - Word currentWord = get(strWord); - if (currentWord == null) { - if (word == null) - word = new Word(strWord); - createWord(word); - put(strWord, word); - currentWord = word; - } else { - currentWord.setWord(word.getWord()); - try { - dbWords.updateSingle(currentWord); - } catch (DatabaseException e) { - log.error("could not update word in database", e); - throw new RuntimeException(e); - } - } - return currentWord; - } - private Word createWord(Word word) { if (createNow) { try { @@ -71,14 +54,10 @@ public class WordMap extends HashMap<String, Word> { return word; } - public Word put(String strWord) { - return put(strWord, null); - } - public void create() throws DatabaseException { List<Word> newWords = new ArrayList<>(); - for (Entry<String, Word> e : this.entrySet()) - if (e.getValue().getId() == null) + for (Entry<String, Word> e : wordMap.entrySet()) + if (!e.getValue().isCreated()) newWords.add(e.getValue()); dbWords.createMultiple(newWords); this.newWords += newWords.size(); diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonField.java b/vipra-util/src/main/java/de/vipra/util/an/JsonField.java deleted file mode 100644 index 058f5301..00000000 --- a/vipra-util/src/main/java/de/vipra/util/an/JsonField.java +++ /dev/null @@ -1,14 +0,0 @@ -package de.vipra.util.an; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.FIELD) -public @interface JsonField { - - public String value() default ""; - -} diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java b/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java deleted file mode 100644 index 70e5b17b..00000000 --- a/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java +++ /dev/null @@ -1,14 +0,0 @@ -package de.vipra.util.an; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.FIELD) -public @interface JsonIgnore { - - public boolean value() default true; - -} \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index 72da3502..4122d623 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -12,18 +12,23 @@ import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import org.mongodb.morphia.annotations.PrePersist; import de.vipra.util.Constants; import de.vipra.util.FileUtils; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; +import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") +@JsonType("article") @Entity(value = "articles", noClassnameStored = true) -public class Article extends Model implements Serializable { +@Indexes({ @Index("title"), @Index("date") }) +public class Article extends FileModel implements Serializable { @Id private ObjectId id; @@ -52,7 +57,7 @@ public class Article extends Model implements Serializable { private ArticleStats stats; @JsonWrap("attributes") - private Date created = new Date(); + private Date created; @JsonWrap("attributes") private Date modified; @@ -155,6 +160,8 @@ public class Article extends Model implements Serializable { @PrePersist public void prePersist() { this.modified = new Date(); + if (this.created == null) + this.created = modified; } @Override diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index 06a3b1e9..db1b8ad3 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -6,6 +6,8 @@ import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import de.vipra.util.WordMap; + @Entity public class ArticleStats implements Serializable { diff --git a/vipra-util/src/main/java/de/vipra/util/model/FileModel.java b/vipra-util/src/main/java/de/vipra/util/model/FileModel.java new file mode 100644 index 00000000..09742a02 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/model/FileModel.java @@ -0,0 +1,21 @@ +package de.vipra.util.model; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; + +import de.vipra.util.Constants; + +@SuppressWarnings("serial") +public abstract class FileModel implements Model { + + public void writeToFile(File file) throws IOException { + FileUtils.writeStringToFile(file, toFileString(), Constants.FB_ENCODING, false); + } + + public abstract void fromFile(File file) throws IOException; + + public abstract String toFileString(); + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/Model.java b/vipra-util/src/main/java/de/vipra/util/model/Model.java index 8fa5aab1..0d133d54 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Model.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Model.java @@ -1,39 +1,5 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; -import java.net.URI; -import java.net.URISyntaxException; -import org.apache.commons.io.FileUtils; -import org.bson.types.ObjectId; - -import de.vipra.util.Constants; - -@SuppressWarnings("serial") -public abstract class Model implements Serializable { - - public URI uri(URI base) { - try { - return new URI(base.toString() + "/" + getId().toString()); - } catch (URISyntaxException e) { - return null; - } - } - - public void writeToFile(File file) throws IOException { - FileUtils.writeStringToFile(file, toFileString(), Constants.FB_ENCODING, false); - } - - public abstract ObjectId getId(); - - public abstract void setId(ObjectId id); - - public abstract void setId(String id); - - public abstract void fromFile(File file) throws IOException; - - public abstract String toFileString(); - -} +public interface Model extends Serializable {} \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index 1e1d7a47..0ac387d9 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -1,19 +1,21 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import de.vipra.util.MongoUtils; -import de.vipra.util.ex.NotImplementedException; +import de.vipra.util.an.JsonType; @SuppressWarnings("serial") +@JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -public class Topic extends Model implements Serializable { +@Indexes(@Index("name")) +public class Topic implements Model, Serializable { @Id private ObjectId id; @@ -39,14 +41,4 @@ public class Topic extends Model implements Serializable { this.name = name; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); - } - - @Override - public String toFileString() { - throw new NotImplementedException(); - } - } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 5af0a958..412232b7 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -1,7 +1,5 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Date; @@ -19,12 +17,11 @@ import de.vipra.util.StringUtils; import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.an.QueryIgnore; -import de.vipra.util.ex.NotImplementedException; @SuppressWarnings("serial") @JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -public class TopicFull extends Model implements Serializable { +public class TopicFull implements Model, Serializable { @Id private ObjectId id; @@ -38,10 +35,10 @@ public class TopicFull extends Model implements Serializable { @Embedded @JsonWrap("attributes") @QueryIgnore(multi = true) - private List<TopicWord> topicWords; + private List<TopicWord> words; @JsonWrap("attributes") - private Date created = new Date(); + private Date created; @JsonWrap("attributes") private Date modified; @@ -74,12 +71,12 @@ public class TopicFull extends Model implements Serializable { this.index = index; } - public List<TopicWord> getTopicWords() { - return topicWords; + public List<TopicWord> getWords() { + return words; } - public void setTopicWords(List<TopicWord> topicWords) { - this.topicWords = topicWords; + public void setWords(List<TopicWord> topicWords) { + this.words = topicWords; } public Date getCreated() { @@ -98,16 +95,6 @@ public class TopicFull extends Model implements Serializable { this.modified = modified; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); - } - - @Override - public String toFileString() { - throw new NotImplementedException(); - } - @Override public String toString() { return TopicFull.class.getSimpleName() + "[id:" + getId() + ", name:" + getName() + ", created:" + created @@ -117,6 +104,8 @@ public class TopicFull extends Model implements Serializable { @PrePersist public void prePersist() { this.modified = new Date(); + if (this.created == null) + this.created = modified; } public static String getNameFromWords(List<TopicWord> words) { diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java index 0380a0da..ef00fc43 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java @@ -6,14 +6,11 @@ import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Reference; import org.mongodb.morphia.annotations.Transient; -import de.vipra.util.an.JsonIgnore; - @SuppressWarnings("serial") @Embedded public class TopicRef implements Comparable<TopicRef>, Serializable { @Transient - @JsonIgnore private String topicId; @Reference private Topic topic; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java index 5bf9a5cb..abed0a8a 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java @@ -3,14 +3,23 @@ package de.vipra.util.model; import java.io.Serializable; import org.mongodb.morphia.annotations.Embedded; +import org.mongodb.morphia.annotations.PostLoad; import org.mongodb.morphia.annotations.Reference; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; + @SuppressWarnings("serial") @Embedded public class TopicWord implements Comparable<TopicWord>, Serializable { @Reference + @JsonIgnore private Word word; + + @JsonProperty("word") + private String wordString; + private double likeliness; public TopicWord() {} @@ -28,6 +37,10 @@ public class TopicWord implements Comparable<TopicWord>, Serializable { this.word = word; } + public String getWordString() { + return wordString; + } + public double getLikeliness() { return likeliness; } @@ -51,4 +64,9 @@ public class TopicWord implements Comparable<TopicWord>, Serializable { return TopicWord.class.getSimpleName() + "[word:" + word + ", likeliness:" + likeliness + "]"; } + @PostLoad + private void postLoad() { + this.wordString = word.getWord(); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java index 2effbd0d..62ee93d0 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Word.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java @@ -1,35 +1,57 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; -import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.PostLoad; +import org.mongodb.morphia.annotations.PostPersist; +import org.mongodb.morphia.annotations.Transient; -import de.vipra.util.MongoUtils; -import de.vipra.util.ex.NotImplementedException; +import com.fasterxml.jackson.annotation.JsonIgnore; + +import de.vipra.util.an.JsonType; @SuppressWarnings("serial") +@JsonType("word") @Entity(value = "words", noClassnameStored = true) -public class Word extends Model implements Serializable { +public class Word implements Model, Serializable { + + /** + * This is the id. It is used by the frontend, which expects an 'id' field. + * This field is populated on load from the database and it is not stored. + */ + @Transient + private String id; + /** + * This is the actual word. It is used as the database id and is not + * returned to the frontend. + */ @Id - private ObjectId id; + @JsonIgnore private String word; + /** + * The created variable is a helper that marks non-persisted new words in + * the import process. Each word with created = false will be saved before + * topics and topics references are created. + */ + @Transient + @JsonIgnore + private boolean created = false; + public Word() {} public Word(String word) { this.word = word; } - public ObjectId getId() { + public String getId() { return id; } - public void setId(ObjectId id) { + public void setId(String id) { this.id = id; } @@ -39,21 +61,22 @@ public class Word extends Model implements Serializable { public void setWord(String word) { this.word = word; + this.id = word; } - @Override - public void setId(String id) { - this.id = MongoUtils.objectId(id); + public boolean isCreated() { + return created; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); + public void setCreated(boolean created) { + this.created = created; } - @Override - public String toFileString() { - throw new NotImplementedException(); + @PostLoad + @PostPersist + private void post() { + this.id = word; + this.created = true; } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index ed3f222a..9a1d4e05 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -2,22 +2,23 @@ package de.vipra.util.service; import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; -import org.bson.types.ObjectId; import org.mongodb.morphia.Datastore; import org.mongodb.morphia.query.Query; import de.vipra.util.Config; import de.vipra.util.ListUtils; import de.vipra.util.Mongo; -import de.vipra.util.MongoUtils; import de.vipra.util.an.QueryIgnore; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.Model; -public class DatabaseService<T extends Model> implements Service<T, DatabaseException> { +public class DatabaseService<T extends Model, U> implements Service<T, U, DatabaseException> { private final Datastore datastore; private final Class<T> clazz; @@ -45,15 +46,17 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce } @Override - public T getSingle(String id) { - Query<T> q = datastore.createQuery(clazz).field("_id").equal(new ObjectId(id)); - if (ignoredFieldsSingle.length > 0) + public T getSingle(U id, String... fields) { + Query<T> q = datastore.createQuery(clazz).field("_id").equal(id); + if (fields != null && fields.length > 0) + q.retrievedFields(true, setMinus(fields, ignoredFieldsSingle)); + else if (ignoredFieldsSingle.length > 0) q.retrievedFields(false, ignoredFieldsSingle); return q.get(); } @Override - public List<T> getMultiple(Integer skip, Integer limit, String sortBy) { + public List<T> getMultiple(Integer skip, Integer limit, String sortBy, String... fields) { Query<T> q = datastore.createQuery(clazz); if (skip != null) q.offset(skip); @@ -61,15 +64,17 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce q.limit(limit); if (sortBy != null) q.order(sortBy); - if (ignoredFieldsMulti.length > 0) + if (fields != null && fields.length > 0) + q.retrievedFields(true, setMinus(fields, ignoredFieldsMulti)); + else if (ignoredFieldsMulti.length > 0) q.retrievedFields(false, ignoredFieldsMulti); List<T> list = q.asList(); return list; } @Override - public List<T> getAll() { - return getMultiple(null, null, null); + public List<T> getAll(String... fields) { + return getMultiple(null, null, null, fields); } @Override @@ -86,8 +91,8 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce } @Override - public long deleteSingle(String id) throws DatabaseException { - return datastore.delete(MongoUtils.objectId(id)).getN(); + public long deleteSingle(U id) throws DatabaseException { + return datastore.delete(id).getN(); } @Override @@ -105,10 +110,20 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce return datastore.getCount(clazz); } - public static <T extends Model> DatabaseService<T> getDatabaseService(Config config, Class<T> clazz) + public static <T extends Model, U> DatabaseService<T, U> getDatabaseService(Config config, Class<T> clazz) throws ConfigException { Mongo mongo = Mongo.getInstance(config); - return new DatabaseService<T>(mongo, clazz); + return new DatabaseService<T, U>(mongo, clazz); + } + + private String[] setMinus(String[] a, String[] b) { + if (a != null && b != null) { + Set<String> sa = new HashSet<>(Arrays.asList(a)); + Set<String> sb = new HashSet<>(Arrays.asList(b)); + sa.removeAll(sb); + return sa.toArray(new String[sa.size()]); + } + return a; } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java index 659a5795..4fb210eb 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/Service.java +++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java @@ -4,21 +4,21 @@ import java.util.List; import de.vipra.util.model.Model; -public interface Service<T extends Model, E extends Exception> { +public interface Service<Type extends Model, IdType, E extends Exception> { - T getSingle(String id) throws E; + Type getSingle(IdType id, String... fields) throws E; - List<T> getMultiple(Integer skip, Integer limit, String sortBy) throws E; + List<Type> getMultiple(Integer skip, Integer limit, String sortBy, String... fields) throws E; - List<T> getAll() throws E; + List<Type> getAll(String... fields) throws E; - T createSingle(T t) throws E; + Type createSingle(Type t) throws E; - List<T> createMultiple(Iterable<T> t) throws E; + List<Type> createMultiple(Iterable<Type> t) throws E; - long deleteSingle(String id) throws E; + long deleteSingle(IdType id) throws E; - void updateSingle(T t) throws E; + void updateSingle(Type t) throws E; void drop() throws E; -- GitLab