diff --git a/jgibblda/.project b/jgibblda/.project index 9686b2ae5623076a3cc9959c3a28754015fba333..2f6739e94a4f8a44417d3b5e9e2bf9db4a44913f 100644 --- a/jgibblda/.project +++ b/jgibblda/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index 1178f37887c3d459c6e40efa5159186a480d8a98..c8f2f55db8de2f92a1e5ee8e0d9112eb625758f3 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -454,32 +454,46 @@ "expanded_folders": [ "/home/eike/repos/master/ma-impl", - "/home/eike/repos/master/ma-impl/vipra-cmd", "/home/eike/repos/master/ma-impl/vipra-ui", "/home/eike/repos/master/ma-impl/vipra-ui/app", - "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters", "/home/eike/repos/master/ma-impl/vipra-ui/app/components", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show" + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words", + "/home/eike/repos/master/ma-impl/vm/data" ], "file_history": [ + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words/show.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/components/items-list.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/items-list.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/words.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/index.hbs", + "/home/eike/repos/master/ma-impl/vipra-ui/app/router.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words", + "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/words/index.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/word.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/topic.js", + "/home/eike/repos/master/ma-impl/vipra-ui/app/models/article.js", + "/home/eike/repos/master/ma-impl/vm/data/test-10.json", "/home/eike/repos/master/ma-impl/vipra-ui/app/components/topic-link.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/show.hbs", "/home/eike/.local/share/vipra/jgibb/jgibb.twords", "/home/eike/.local/share/vipra/jgibb/jgibb.tassign", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/index.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/index.hbs", "/home/eike/Downloads/FRITZ.Box 7490 113.06.30_17.01.16_2147.export", "/home/eike/repos/master/ma-impl/vm/data/test-1.json", "/home/eike/repos/master/ma-impl/vm/data/test-2.json", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/index.js", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/index.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/adapters/application.js", "/home/eike/.local/share/vipra/jgibb/jgibb", "/home/eike/repos/master/ma-doc/thesis/thesis.tex", @@ -488,11 +502,8 @@ "/home/eike/repos/master/ma-impl/vm/webapps/ROOT/index.html", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/index.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models/topic.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/show/edit.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/topics-list.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/router.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/topics/show.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show/edit.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/show.hbs", @@ -510,7 +521,6 @@ "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/articles/list.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/components/articles-list.hbs", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates/topics/list.hbs", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models/article.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/components/filter-list.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers/topic-numi.js", "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers/topicname.js", @@ -587,16 +597,7 @@ "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/app/serializers/application.js", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/bower.json", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/bower_components/ember/.bower.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/package.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/app/app.js", - "/home/eike/Repositories/fu/ss15/ma/doc/thesis/thesis.tex", - "/home/eike/Repositories/fu/ss15/ma/impl/Vagrantfile", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-processor-backend/src/main/scala/de/cochu/tmbs/processor/MongoDBTest.scala", - "/home/eike/Repositories/fu/ss15/ma/impl/.gitignore", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/initd-tomcat", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/environment", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/articles.hbs", - "/home/eike/Repositories/fu/ss15/ma/impl/tmbs-frontend/app/templates/application.hbs" + "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/package.json" ], "find": { diff --git a/vipra-cmd/.project b/vipra-cmd/.project index e6b85466c1e3c9aa9198c926ed97d5b7c87344ed..b44a107f26b7c9daa76b66453cbb06333efaba11 100644 --- a/vipra-cmd/.project +++ b/vipra-cmd/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java index df84fbe297193d17a2660f7c8fadbb6069aa540f..217f210eeac40ee2f5b216c2de523b5ff8850e98 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java @@ -14,14 +14,16 @@ import org.slf4j.LoggerFactory; import de.vipra.cmd.ex.LDAAnalyzerException; import de.vipra.util.Config; +import de.vipra.util.Constants; import de.vipra.util.ConvertStream; +import de.vipra.util.NumberUtils; import de.vipra.util.StringUtils; +import de.vipra.util.WordMap; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; import de.vipra.util.model.TopicWord; import de.vipra.util.model.Word; -import de.vipra.util.model.WordMap; import jgibblda.Estimator; import jgibblda.Inferencer; import jgibblda.LDACmdOption; @@ -30,6 +32,7 @@ import jgibblda.Model; public class JGibbLDAAnalyzer extends LDAAnalyzer { public static final Logger log = LoggerFactory.getLogger(JGibbLDAAnalyzer.class); + public static final String NAME = "jgibb"; private File dataDir; private File modelDir; @@ -51,15 +54,17 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { throw new LDAAnalyzerException(e); } - modelDir = new File(dataDir, "jgibb"); + modelDir = new File(dataDir, NAME); options.dir = modelDir.getAbsolutePath(); - options.estc = new File(modelDir, "jgibb.tassign").exists(); + options.estc = new File(modelDir, NAME + ".tassign").exists(); options.est = !options.estc; + options.K = Constants.K_TOPICS; + options.twords = Constants.K_TOPIC_WORDS; - modelFile = new File(modelDir, "jgibb"); + modelFile = new File(modelDir, NAME); options.dfile = modelFile.getName(); - options.modelName = "jgibb"; + options.modelName = NAME; this.wordMap = wordMap; } @@ -87,7 +92,7 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { @Override public ConvertStream<TopicFull> getTopicDefinitions() throws LDAAnalyzerException { - File twords = new File(modelDir, "jgibb.twords"); + File twords = new File(modelDir, NAME + ".twords"); try { return new ConvertStream<TopicFull>(twords) { @Override @@ -113,7 +118,8 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { String[] parts = nextLine.trim().split("\\s+"); try { Word word = wordMap.get(parts[0]); - double likeliness = Double.parseDouble(parts[1]); + double likeliness = NumberUtils.roundToPrecision(Double.parseDouble(parts[1]), + Constants.LIKELINESS_PRECISION); TopicWord topicWord = new TopicWord(word, likeliness); topicWords.add(topicWord); } catch (NumberFormatException e) { @@ -126,8 +132,8 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { break; } } - Collections.sort(topicWords); - topicDef.setTopicWords(topicWords); + Collections.sort(topicWords, Collections.reverseOrder()); + topicDef.setWords(topicWords); topicDef.setName(TopicFull.getNameFromWords(topicWords)); return topicDef; } @@ -139,7 +145,7 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer { @Override public ConvertStream<List<TopicRef>> getTopics() throws LDAAnalyzerException { - File tassign = new File(modelDir, "jgibb.tassign"); + File tassign = new File(modelDir, NAME + ".tassign"); try { return new ConvertStream<List<TopicRef>>(tassign) { @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java index 6a34f1ceec90eda810685d7e38b067906723bba4..d5ce849f86f2dc7d0ff7a377bdaf407d4a3da8da 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java @@ -7,9 +7,9 @@ import de.vipra.util.Config; import de.vipra.util.Config.Key; import de.vipra.util.Constants; import de.vipra.util.ConvertStream; +import de.vipra.util.WordMap; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; -import de.vipra.util.model.WordMap; public abstract class LDAAnalyzer { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java index d271eabc64c75a79c23e36812ad7e02b68ee4072..b56dd5965ca68eb14ffa1f0191b5ebf2a9a7e0e3 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,9 +25,9 @@ public class ClearCommand implements Command { private boolean defaults; private Config config; - private DatabaseService<ProcessedArticle> dbArticles; - private DatabaseService<TopicFull> dbTopics; - private DatabaseService<Word> dbWords; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; + private DatabaseService<TopicFull, ObjectId> dbTopics; + private DatabaseService<Word, String> dbWords; public ClearCommand(boolean defaults) { this.defaults = defaults; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java index 4795c0cfda1aa997f5244141329ed7eaa3437467..ff033a4908cbfbccf467669ee979a523a4414fb5 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteCommand.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,6 +14,7 @@ import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.file.Filebase; import de.vipra.cmd.model.ProcessedArticle; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.service.DatabaseService; @@ -24,7 +26,7 @@ public class DeleteCommand implements Command { private ArrayList<String> ids = new ArrayList<>(); private Config config; - private DatabaseService<ProcessedArticle> dbArticles; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; private Filebase filebase; /** @@ -53,7 +55,7 @@ public class DeleteCommand implements Command { try { // 1. delete mongodb entry - dbArticles.deleteSingle(id); + dbArticles.deleteSingle(MongoUtils.objectId(id)); } catch (DatabaseException e) { errors.add(e); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 1a54d02dc32420a22cad46339522e56c90855786..7cefa773b8a106dc6085b73943927bb402daec2f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -11,6 +11,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import org.bson.types.ObjectId; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; @@ -20,7 +21,6 @@ import org.slf4j.LoggerFactory; import de.vipra.cmd.ExecutionException; import de.vipra.cmd.ex.ImportException; -import de.vipra.cmd.ex.LDAAnalyzerException; import de.vipra.cmd.file.Filebase; import de.vipra.cmd.file.FilebaseIndex; import de.vipra.cmd.lda.LDAAnalyzer; @@ -29,14 +29,15 @@ import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.util.Config; import de.vipra.util.ConvertStream; +import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.Timer; +import de.vipra.util.WordMap; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleStats; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; import de.vipra.util.model.Word; -import de.vipra.util.model.WordMap; import de.vipra.util.service.DatabaseService; public class ImportCommand implements Command { @@ -47,9 +48,9 @@ public class ImportCommand implements Command { private ArrayList<File> files = new ArrayList<>(); private JSONParser parser = new JSONParser(); private Config config; - private DatabaseService<ProcessedArticle> dbArticles; - private DatabaseService<TopicFull> dbTopics; - private DatabaseService<Word> dbWords; + private DatabaseService<ProcessedArticle, ObjectId> dbArticles; + private DatabaseService<TopicFull, ObjectId> dbTopics; + private DatabaseService<Word, String> dbWords; private Filebase filebase; private Processor preprocessor; private WordMap wordMap; @@ -161,68 +162,6 @@ public class ImportCommand implements Command { return imported; } - /** - * Saves topic definitions into a database collection. Topic definitions - * contain the words assigned to that topic and the likeliness, that a word - * belongs to that topic. - * - * @throws LDAAnalyzerException - * @throws DatabaseException - */ - private Map<String, String> saveTopicDefinitions() throws LDAAnalyzerException, DatabaseException { - ConvertStream<TopicFull> topics = analyzer.getTopicDefinitions(); - Map<String, String> topicIndexMap = new HashMap<>(); - - // recreate topics in database - // create one topic at a time for less memory usage - dbTopics.drop(); - for (TopicFull topic : topics) { - dbTopics.createSingle(topic); - topicIndexMap.put(Integer.toString(topic.getIndex()), topic.getId().toString()); - } - - return topicIndexMap; - } - - /** - * The analyzer saves the topics assigned to document words in the - * "*.tassign" file. This file is read line by line, each line is a single - * document. The line number corresponds to the line number in the index - * file, which holds the object id of that article. The topics are extracted - * and stored in the document. - * - * @throws LDAAnalyzerException - */ - private void saveTopicsPerDocument(Map<String, String> topicIndexMap) throws LDAAnalyzerException { - ConvertStream<List<TopicRef>> topics = analyzer.getTopics(); - FilebaseIndex index = filebase.getIndex(); - - Iterator<String> indexIter = index.iterator(); - Iterator<List<TopicRef>> topicIter = topics.iterator(); - - while (indexIter.hasNext() && topicIter.hasNext()) { - String id = indexIter.next(); - List<TopicRef> topicCount = topicIter.next(); - for (TopicRef tc : topicCount) { - String oid = topicIndexMap.get(tc.getTopicId()); - if (oid != null) - tc.setTopicId(topicIndexMap.get(tc.getTopicId())); - else - log.error("no object id for topic index " + tc.getTopicId()); - } - ProcessedArticle a = dbArticles.getSingle(id); - if (a != null) - a.setTopics(topicCount); - else - log.error("no article found in db for id " + id); - try { - dbArticles.updateSingle(a); - } catch (DatabaseException e) { - log.error("could not update article: " + a.getTitle() + " (" + a.getId() + ")"); - } - } - } - @Override public void run() throws ExecutionException { try { @@ -242,31 +181,80 @@ public class ImportCommand implements Command { Timer timer = new Timer(); timer.start(); - // import files into database and filebase + /* + * import files into database and filebase + */ out.info("file import"); long imported = importFiles(files); timer.lap("import"); - // write filebase + /* + * write filebase + */ out.info("writing file index"); filebase.close(); timer.lap("filebase write"); - // do topic modeling + /* + * do topic modeling + */ out.info("topic modeling"); analyzer.analyze(); timer.lap("topic modeling"); - // save topic model + /* + * save topic model + */ out.info("saving topic definitions"); - Map<String, String> topicIndexMap = saveTopicDefinitions(); + int batchSize = 100; + ConvertStream<TopicFull> topicDefs = analyzer.getTopicDefinitions(); + Map<String, String> topicIndexMap = new HashMap<>(); + dbTopics.drop(); + List<TopicFull> newTopicDefs = new ArrayList<>(batchSize); + Iterator<TopicFull> it = topicDefs.iterator(); + while (it.hasNext()) { + newTopicDefs.add(it.next()); + if (newTopicDefs.size() == batchSize || !it.hasNext()) { + dbTopics.createMultiple(newTopicDefs); + for (TopicFull newTopicDef : newTopicDefs) + topicIndexMap.put(Integer.toString(newTopicDef.getIndex()), newTopicDef.getId().toString()); + } + } timer.lap("saving topics"); - // save topic refs + /* + * save topic refs + */ out.info("saving document topics"); - saveTopicsPerDocument(topicIndexMap); + ConvertStream<List<TopicRef>> topics = analyzer.getTopics(); + FilebaseIndex index = filebase.getIndex(); + Iterator<String> indexIter = index.iterator(); + Iterator<List<TopicRef>> topicIter = topics.iterator(); + while (indexIter.hasNext() && topicIter.hasNext()) { + List<TopicRef> topicCount = topicIter.next(); + for (TopicRef tc : topicCount) { + String oid = topicIndexMap.get(tc.getTopicId()); + tc.setTopicId(oid); + if (oid == null) + log.error("no object id for topic index " + tc.getTopicId()); + } + String id = indexIter.next(); + ProcessedArticle a = dbArticles.getSingle(MongoUtils.objectId(id)); + if (a != null) + a.setTopics(topicCount); + else + log.error("no article found in db for id " + id); + try { + dbArticles.updateSingle(a); + } catch (DatabaseException e) { + log.error("could not update article: " + a.getTitle() + " (" + a.getId() + ")"); + } + } timer.lap("saving topic refs"); + /* + * run information + */ out.info("imported " + imported + " new " + StringUtils.quantity(imported, "article")); long newWords = wordMap.getNewWords(); out.info("imported " + newWords + " new " + StringUtils.quantity(newWords, "word")); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java index 4793f14f216a68e9b6816010c471b8d34a091d6e..d8ed55cf854c5b4115c56c9a4d2c54ba5fb2d839 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java @@ -3,6 +3,7 @@ package de.vipra.cmd.option; import java.io.File; import java.io.IOException; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +23,7 @@ public class StatsCommand implements Command { private Config config; private Filebase filebase; - private DatabaseService<TopicFull> dbTopics; + private DatabaseService<TopicFull, ObjectId> dbTopics; private void stats() { File modelFile = filebase.getModelFile(); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java index afc32c130db9ab84115c5be5218d529b8ba4bf25..87f70cb1a4d0297bfd36eb26b56d7fd14ba8cd3d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java @@ -30,7 +30,7 @@ public class CoreNLPProcessor extends Processor { @Override public ProcessedText preprocess(String input) throws PreprocessorException { - Annotation doc = new Annotation(input); + Annotation doc = new Annotation(input.toLowerCase()); nlp.annotate(doc); StringBuilder sb = new StringBuilder(); List<CoreMap> sentences = doc.get(SentencesAnnotation.class); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java index 5415cde706b16540f3dd50c475dac74f998aa689..838bbf033f0dcd8a134947d094c7a51f7cb64d38 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java @@ -27,7 +27,7 @@ public class StopwordsAnnotator implements Annotator, CoreAnnotation<Boolean> { public void annotate(Annotation annotation) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); for (CoreLabel token : tokens) { - if (stopWords.contains(token.word().toLowerCase())) { + if (stopWords.contains(token.word())) { token.set(StopwordsAnnotator.class, true); } } diff --git a/vipra-rest/.project b/vipra-rest/.project index 6c0bf9d8ef012a14ad6139f5c6e3df68ed44e5c3..569e5873ae092077c30dea1efc63f96c79aa8564 100644 --- a/vipra-rest/.project +++ b/vipra-rest/.project @@ -31,6 +31,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -39,5 +44,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> <nature>org.eclipse.wst.jsdt.core.jsNature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-rest/pom.xml b/vipra-rest/pom.xml index ff073cf971f5db8c048fa8280b9a875cc5d7bc43..9606dffeda6022f3b758a599b01e8c0c58f6c927 100644 --- a/vipra-rest/pom.xml +++ b/vipra-rest/pom.xml @@ -18,6 +18,7 @@ <jettyVersion>9.3.6.v20151106</jettyVersion> <servletVersion>3.1.0</servletVersion> <log4jVersion>2.4.1</log4jVersion> + <jacksonVersion>2.7.0</jacksonVersion> </properties> <build> @@ -34,7 +35,19 @@ <dependency> <groupId>org.glassfish.jersey.media</groupId> <artifactId>jersey-media-json-jackson</artifactId> - <version>2.22.1</version> + <version>${jerseyVersion}</version> + </dependency> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jacksonVersion}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jacksonVersion}</version> </dependency> <!-- Servlet API --> diff --git a/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java b/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java index 5c43b033c42c976546cf8bce3b43b56836f78021..d18f8d75db0c9b9240a1b466baa99b99dd3a010f 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java +++ b/vipra-rest/src/main/java/de/vipra/rest/model/Wrapper.java @@ -1,12 +1,23 @@ package de.vipra.rest.model; +import java.net.URI; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; + +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.Response.Status; +import javax.ws.rs.core.UriBuilder; + +import de.vipra.rest.Messages; public class Wrapper<T> { private T data; private List<APIError> errors; + private Map<String, String> links; private final APIVersion jsonapi = new APIVersion(); public Wrapper() {} @@ -38,12 +49,25 @@ public class Wrapper<T> { } public void addError(APIError error) { - if (errors == null) { + if (errors == null) errors = new ArrayList<>(); - } errors.add(error); } + public Map<String, String> getLinks() { + return links; + } + + public void setLinks(Map<String, String> links) { + this.links = links; + } + + public void addLink(String key, String link) { + if (links == null) + links = new HashMap<>(); + links.put(key, link); + } + public APIVersion getJsonapi() { return jsonapi; } @@ -52,4 +76,83 @@ public class Wrapper<T> { return data != null ? Integer.toString(data.hashCode()) : null; } + public void addPaginationLinks(URI base, Integer skip, Integer limit, long count) { + if (skip == null || limit == null || limit == 0) + return; + + if (skip < 0) { + addError(new APIError(Response.Status.BAD_REQUEST, "Wrong skip number", + String.format(Messages.BAD_REQUEST, "skip number must be greater or equal to 0"))); + return; + } + + if (limit < 0) { + addError(new APIError(Response.Status.BAD_REQUEST, "Wrong limit size", + String.format(Messages.BAD_REQUEST, "when using skip, limit size must be greater or equal to 1"))); + return; + } + + addLink("first", UriBuilder.fromUri(base).queryParam("skip", 0).queryParam("limit", limit).build().toString()); + addLink("last", UriBuilder.fromUri(base).queryParam("skip", (count / limit) * limit).queryParam("limit", limit) + .build().toString()); + + if (skip > 0) { + int diff = skip % limit; + if (diff == 0) + diff = limit; + int prevSkip = Math.max(0, skip - diff); + addLink("prev", UriBuilder.fromUri(base).queryParam("skip", prevSkip).queryParam("limit", limit).build() + .toString()); + } + + if (skip + limit < count) { + int diff = limit - skip % limit; + if (diff == 0) + diff = limit; + int nextSkip = skip + diff; + addLink("next", UriBuilder.fromUri(base).queryParam("skip", nextSkip).queryParam("limit", limit).build() + .toString()); + } + } + + public boolean hasErrors() { + return errors != null && errors.size() > 0; + } + + public Response ok() { + return Response.ok().entity(this).tag(tag()).build(); + } + + public Response ok(T data) { + this.data = data; + return ok(); + } + + public Response badRequest() { + return Response.status(Status.BAD_REQUEST).entity(this).build(); + } + + public Response serverError() { + return Response.serverError().entity(this).build(); + } + + public Response created(URI loc) { + return Response.created(loc).entity(this).build(); + } + + public Response notFound() { + return Response.status(Status.NOT_FOUND).entity(this).build(); + } + + public Response noContent() { + return Response.noContent().build(); + } + + public Response status(Status status, boolean withEntity) { + ResponseBuilder r = Response.status(status); + if (withEntity) + r.entity(this); + return r.build(); + } + } diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java index e7f5577aaca3739c57e2515e223ad87e444e8cde..2b1060cfdc5ac54630eb887bafae3a7a771334f7 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java +++ b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java @@ -21,6 +21,7 @@ import de.vipra.rest.serializer.ObjectIdSerializer; import de.vipra.util.Constants; import de.vipra.util.model.Article; import de.vipra.util.model.TopicFull; +import de.vipra.util.model.Word; @Provider public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { @@ -46,6 +47,9 @@ public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { module.addSerializer(TopicFull.class, new GenericSerializer<TopicFull>(TopicFull.class)); module.addDeserializer(TopicFull.class, new GenericDeserializer<TopicFull>(TopicFull.class)); + module.addSerializer(Word.class, new GenericSerializer<Word>(Word.class)); + module.addDeserializer(Word.class, new GenericDeserializer<Word>(Word.class)); + module.addSerializer(ObjectId.class, new ObjectIdSerializer()); module.addDeserializer(ObjectId.class, new ObjectIdDeserializer()); diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java index bf182eaacc59fe0f56f75fa70703944608d1c954..79d0ccc09c2d8d4979f603d7d45f476fc9d99227 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ArticleResource.java @@ -1,6 +1,10 @@ package de.vipra.rest.resource; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.util.List; import javax.servlet.ServletContext; @@ -18,16 +22,17 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import org.bson.types.ObjectId; import org.ehcache.Cache; import org.ehcache.CacheManager; import org.ehcache.config.CacheConfigurationBuilder; import de.vipra.rest.APIMediaType; import de.vipra.rest.Messages; -import de.vipra.rest.PATCH; import de.vipra.rest.model.APIError; import de.vipra.rest.model.Wrapper; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.Article; @@ -39,8 +44,8 @@ public class ArticleResource { @Context UriInfo uri; - final Cache<String, Article> articleCache; - final DatabaseService<Article> service; + final Cache<String, Article> cache; + final DatabaseService<Article, ObjectId> service; public ArticleResource(@Context ServletContext servletContext) throws ConfigException, IOException { Config config = Config.getConfig(); @@ -51,40 +56,56 @@ public class ArticleResource { if (articleCache == null) articleCache = manager.createCache("articlecache", CacheConfigurationBuilder.newCacheConfigurationBuilder().buildConfig(String.class, Article.class)); - this.articleCache = articleCache; + this.cache = articleCache; } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) - public Response getArticles(@QueryParam("skip") @DefaultValue("0") int skip, - @QueryParam("limit") @DefaultValue("0") int limit, - @QueryParam("sort") @DefaultValue("date") String sortBy) { - List<Article> articles = service.getMultiple(skip, limit, sortBy); - Wrapper<List<Article>> res = new Wrapper<>(articles); - return Response.ok().entity(res).tag(res.tag()).build(); + public Response getArticles(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("date") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<Article>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return res.badRequest(); + + try { + List<Article> articles = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(articles); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) @Consumes(APIMediaType.APPLICATION_JSONAPI) @Path("{id}") - public Response getArticle(@PathParam("id") String id) { + public Response getArticle(@PathParam("id") String id, @QueryParam("fields") String fields) { Wrapper<Article> res = new Wrapper<>(); if (id == null || id.trim().length() == 0) { res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", String.format(Messages.BAD_REQUEST, "id cannot be empty"))); - return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + return res.badRequest(); } - Article article = getSingle(id); + Article article; + try { + article = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } if (article != null) { - res.setData(article); - return Response.ok().entity(res).tag(res.tag()).build(); + return res.ok(article); } else { - String msg = String.format(Messages.NOT_FOUND, "article", id); - res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", msg)); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", + String.format(Messages.NOT_FOUND, "article", id))); + return res.notFound(); } } @@ -96,11 +117,12 @@ public class ArticleResource { try { article = service.createSingle(article); res = new Wrapper<>(article); - return Response.created(article.uri(uri.getAbsolutePath())).entity(res).tag(res.tag()).build(); - } catch (DatabaseException e) { + URI newUri = new URL(uri.getAbsolutePath().toURL(), article.getId().toString()).toURI(); + return res.created(newUri); + } catch (DatabaseException | MalformedURLException | URISyntaxException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be created", "item could not be created due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } @@ -110,23 +132,23 @@ public class ArticleResource { Wrapper<Article> res = new Wrapper<>(); long deleted; try { - deleted = service.deleteSingle(id); + deleted = service.deleteSingle(MongoUtils.objectId(id)); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be deleted", "item could not be created due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } - articleCache.remove(id); + cache.remove(id); int del = deleted > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) deleted; switch (del) { case 0: res.addError(new APIError(Response.Status.NOT_FOUND, "Article not found", String.format(Messages.NOT_FOUND, "article", id))); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + return res.notFound(); case 1: - return Response.noContent().build(); + return res.noContent(); default: - return Response.serverError().build(); + return res.serverError(); } } @@ -139,35 +161,35 @@ public class ArticleResource { Wrapper<Article> res = new Wrapper<>(); try { service.updateSingle(article); - articleCache.put(id, article); - res.setData(article); - return Response.ok().entity(res).tag(res.tag()).build(); + cache.put(id, article); + return res.ok(article); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be updated", "item could not be updated due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } - @PATCH - @Consumes(APIMediaType.APPLICATION_JSONAPI) - @Produces(APIMediaType.APPLICATION_JSONAPI) - @Path("{id}") - public Response updateArticle(@PathParam("id") String id, Wrapper<Article> wrapper) { - Article newArticle = wrapper.getData(); - Article article = getSingle(id); - // TODO implement - return null; + private Article getSingle(String id, String[] fields) { + if (fields.length == 0) { + Article article = cache.get(id); + if (article == null) { + article = service.getSingle(MongoUtils.objectId(id)); + if (article != null) + cache.put(id, article); + } + return article; + } else + return service.getSingle(MongoUtils.objectId(id), fields); } - private Article getSingle(String id) { - Article article = articleCache.get(id); - if (article == null) { - article = service.getSingle(id); - if (article != null) - articleCache.put(id, article); - } - return article; + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java index 44cc2964da7a70befbeba57c8bc4b269fa9543ce..bf471644813542a3ae88a257b75c44aaae96ee5e 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/TopicResource.java @@ -16,16 +16,17 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import org.bson.types.ObjectId; import org.ehcache.Cache; import org.ehcache.CacheManager; import org.ehcache.config.CacheConfigurationBuilder; import de.vipra.rest.APIMediaType; import de.vipra.rest.Messages; -import de.vipra.rest.PATCH; import de.vipra.rest.model.APIError; import de.vipra.rest.model.Wrapper; import de.vipra.util.Config; +import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.TopicFull; @@ -37,8 +38,8 @@ public class TopicResource { @Context UriInfo uri; - final Cache<String, TopicFull> topicCache; - final DatabaseService<TopicFull> service; + final Cache<String, TopicFull> cache; + final DatabaseService<TopicFull, ObjectId> service; public TopicResource(@Context ServletContext servletContext) throws ConfigException, IOException { Config config = Config.getConfig(); @@ -49,39 +50,56 @@ public class TopicResource { if (topicCache == null) topicCache = manager.createCache("topiccache", CacheConfigurationBuilder.newCacheConfigurationBuilder() .buildConfig(String.class, TopicFull.class)); - this.topicCache = topicCache; + this.cache = topicCache; } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) - public Response getTopics(@QueryParam("skip") @DefaultValue("0") int skip, - @QueryParam("limit") @DefaultValue("0") int limit) { - List<TopicFull> topics = service.getMultiple(skip, limit, null); - Wrapper<List<TopicFull>> res = new Wrapper<>(topics); - return Response.ok().entity(res).tag(res.tag()).build(); + public Response getTopics(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("date") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<TopicFull>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + + try { + List<TopicFull> topics = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(topics); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + } } @GET @Produces(APIMediaType.APPLICATION_JSONAPI) @Consumes(APIMediaType.APPLICATION_JSONAPI) @Path("{id}") - public Response getTopic(@PathParam("id") String id) { + public Response getTopic(@PathParam("id") String id, @QueryParam("fields") String fields) { Wrapper<TopicFull> res = new Wrapper<>(); if (id == null || id.trim().length() == 0) { res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", String.format(Messages.BAD_REQUEST, "id cannot be empty"))); - return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); + return res.badRequest(); } - TopicFull topic = getSingle(id); + TopicFull topic; + try { + topic = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } - if (topic != null) { - res.setData(topic); - return Response.ok().entity(res).tag(res.tag()).build(); - } else { + if (topic != null) + return res.ok(topic); + else { res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", String.format(Messages.NOT_FOUND, "topic", id))); - return Response.status(Response.Status.NOT_FOUND).entity(res).build(); + return res.notFound(); } } @@ -94,35 +112,35 @@ public class TopicResource { Wrapper<TopicFull> res = new Wrapper<>(); try { service.updateSingle(topic); - topicCache.put(id, topic); - res.setData(topic); - return Response.ok().entity(res).tag(res.tag()).build(); + cache.put(id, topic); + return res.ok(topic); } catch (DatabaseException e) { res = new Wrapper<>(new APIError(Response.Status.INTERNAL_SERVER_ERROR, "item could not be updated", "item could not be updated due to an internal server error")); - return Response.serverError().entity(res).build(); + return res.serverError(); } } - @PATCH - @Consumes(APIMediaType.APPLICATION_JSONAPI) - @Produces(APIMediaType.APPLICATION_JSONAPI) - @Path("{id}") - public Response updateTopic(@PathParam("id") String id, Wrapper<TopicFull> wrapper) { - TopicFull newTopic = wrapper.getData(); - TopicFull topic = getSingle(id); - // TODO implement - return null; + private TopicFull getSingle(String id, String[] fields) { + if (fields.length == 0) { + TopicFull topic = cache.get(id); + if (topic == null) { + topic = service.getSingle(MongoUtils.objectId(id)); + if (topic != null) + cache.put(id, topic); + } + return topic; + } else + return service.getSingle(MongoUtils.objectId(id), fields); } - private TopicFull getSingle(String id) { - TopicFull topic = topicCache.get(id); - if (topic == null) { - topic = service.getSingle(id); - if (topic != null) - topicCache.put(id, topic); - } - return topic; + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java new file mode 100644 index 0000000000000000000000000000000000000000..b765eafbb94b409a582d605bdc506069fb898a13 --- /dev/null +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/WordResource.java @@ -0,0 +1,124 @@ +package de.vipra.rest.resource; + +import java.io.IOException; +import java.util.List; + +import javax.servlet.ServletContext; +import javax.ws.rs.Consumes; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriInfo; + +import org.ehcache.Cache; +import org.ehcache.CacheManager; +import org.ehcache.config.CacheConfigurationBuilder; + +import de.vipra.rest.APIMediaType; +import de.vipra.rest.Messages; +import de.vipra.rest.model.APIError; +import de.vipra.rest.model.Wrapper; +import de.vipra.util.Config; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.model.Word; +import de.vipra.util.service.DatabaseService; + +@Path("words") +public class WordResource { + + @Context + UriInfo uri; + + final Cache<String, Word> cache; + final DatabaseService<Word, String> service; + + public WordResource(@Context ServletContext servletContext) throws ConfigException, IOException { + Config config = Config.getConfig(); + service = DatabaseService.getDatabaseService(config, Word.class); + + CacheManager manager = (CacheManager) servletContext.getAttribute("cachemanager"); + Cache<String, Word> wordCache = manager.getCache("wordcache", String.class, Word.class); + if (wordCache == null) + wordCache = manager.createCache("wordcache", + CacheConfigurationBuilder.newCacheConfigurationBuilder().buildConfig(String.class, Word.class)); + this.cache = wordCache; + } + + @GET + @Produces(APIMediaType.APPLICATION_JSONAPI) + public Response getWords(@QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + @QueryParam("sort") @DefaultValue("word") String sortBy, @QueryParam("fields") String fields) { + Wrapper<List<Word>> res = new Wrapper<>(); + + if (skip != null && limit != null) + res.addPaginationLinks(uri.getAbsolutePath(), skip, limit, service.count()); + + if (res.hasErrors()) + return res.badRequest(); + + try { + List<Word> words = service.getMultiple(skip, limit, sortBy, getFields(fields)); + return res.ok(words); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } + } + + @GET + @Produces(APIMediaType.APPLICATION_JSONAPI) + @Consumes(APIMediaType.APPLICATION_JSONAPI) + @Path("{id}") + public Response getWord(@PathParam("id") String id, @QueryParam("fields") String fields) { + Wrapper<Word> res = new Wrapper<>(); + if (id == null || id.trim().length() == 0) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "ID is empty", + String.format(Messages.BAD_REQUEST, "id cannot be empty"))); + return res.badRequest(); + } + + Word word; + try { + word = getSingle(id, getFields(fields)); + } catch (Exception e) { + res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); + return res.badRequest(); + } + + if (word != null) + return res.ok(word); + else { + String msg = String.format(Messages.NOT_FOUND, "word", id); + res.addError(new APIError(Response.Status.NOT_FOUND, "Resource not found", msg)); + return res.notFound(); + } + } + + private Word getSingle(String id, String[] fields) { + if (fields.length > 0) { + Word word = cache.get(id); + if (word == null) { + word = service.getSingle(id); + if (word != null) + cache.put(id, word); + } + return word; + } else + return service.getSingle(id, fields); + } + + private String[] getFields(String fields) { + if (fields == null) + return null; + fields = fields.trim(); + if (fields.length() == 0) + return null; + return fields.split(","); + } + +} diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java index ff9ea7d98237212ef1c0b82d7cffdef0a520781e..5945e5cf4ac76a455be14a2e59bac44d431eb422 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericDeserializer.java @@ -17,7 +17,6 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; -import de.vipra.util.an.JsonField; import de.vipra.util.an.JsonWrap; import de.vipra.util.model.Model; @@ -42,18 +41,14 @@ public class GenericDeserializer<T extends Model> extends JsonDeserializer<T> { String name = field.getName(); - JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); - if (jw != null) - name = jw.value() + "." + name; - - JsonField jf = field.getDeclaredAnnotation(JsonField.class); - if (jf != null) - name = jf.value(); - JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); if (jp != null) name = jp.value(); + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + allFields.put(name, field); String[] parts = name.split("\\."); diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java index b7c896c0e33cb600c4ac2895fefdc12b55ea7a29..7d76f86ea5ee4b35001e2166c9a4dc5ad6a039d5 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/GenericSerializer.java @@ -11,13 +11,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; -import de.vipra.util.an.JsonField; import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.model.Model; @@ -42,26 +42,20 @@ public class GenericSerializer<T extends Model> extends JsonSerializer<T> { if (Modifier.isPrivate(modifiers) && !Modifier.isStatic(modifiers)) { field.setAccessible(true); - com.fasterxml.jackson.annotation.JsonIgnore ji1 = field - .getDeclaredAnnotation(com.fasterxml.jackson.annotation.JsonIgnore.class); - de.vipra.util.an.JsonIgnore ji2 = field.getDeclaredAnnotation(de.vipra.util.an.JsonIgnore.class); - if ((ji1 != null && ji1.value()) || (ji2 != null && ji2.value())) + JsonIgnore ji = field.getDeclaredAnnotation(JsonIgnore.class); + if (ji != null && ji.value()) continue; String name = field.getName(); - JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); - if (jw != null) - name = jw.value() + "." + name; - - JsonField jf = field.getDeclaredAnnotation(JsonField.class); - if (jf != null) - name = jf.value(); - JsonProperty jp = field.getDeclaredAnnotation(JsonProperty.class); if (jp != null) name = jp.value(); + JsonWrap jw = field.getDeclaredAnnotation(JsonWrap.class); + if (jw != null) + name = jw.value() + "." + name; + foundFields.put(name, field); } } diff --git a/vipra-ui/app/components/articles-list.js b/vipra-ui/app/components/items-list.js similarity index 96% rename from vipra-ui/app/components/articles-list.js rename to vipra-ui/app/components/items-list.js index 88005348a07bb3392c807c181fac5b46e8e5311d..859d9ed8dcc79978b27e95a8b7eb52ac6c95fef3 100644 --- a/vipra-ui/app/components/articles-list.js +++ b/vipra-ui/app/components/items-list.js @@ -2,6 +2,8 @@ import Ember from 'ember'; export default Ember.Component.extend({ + route: "", + filteredItems: Ember.computed('items', 'filter', function() { var keyword = this.get('filter'); var filtered = this.get('items'); diff --git a/vipra-ui/app/components/topics-list.js b/vipra-ui/app/components/topics-list.js deleted file mode 100644 index 88005348a07bb3392c807c181fac5b46e8e5311d..0000000000000000000000000000000000000000 --- a/vipra-ui/app/components/topics-list.js +++ /dev/null @@ -1,15 +0,0 @@ -import Ember from 'ember'; - -export default Ember.Component.extend({ - - filteredItems: Ember.computed('items', 'filter', function() { - var keyword = this.get('filter'); - var filtered = this.get('items'); - if (keyword) { - keyword = keyword.toLowerCase().trim(); - filtered = this.get('items').filter((item) => item.get('_name').toLowerCase().includes(keyword)); - } - return filtered; - }) - -}); \ No newline at end of file diff --git a/vipra-ui/app/models/article.js b/vipra-ui/app/models/article.js index 0c23451f312a782c6fd7ee54314145af85dc0b14..fc38e0953d2df40b7c1ca83dec430ed494561ed0 100644 --- a/vipra-ui/app/models/article.js +++ b/vipra-ui/app/models/article.js @@ -7,6 +7,8 @@ export default DS.Model.extend({ date: DS.attr('date'), stats: DS.attr(), topics: DS.attr(), + created: DS.attr('date'), + modified: DS.attr('date'), _name: function() { var title = this.get('title'); diff --git a/vipra-ui/app/models/topic.js b/vipra-ui/app/models/topic.js index 73627bf94ab0d0825e7b906e3dd8c1cd792a5ed5..f1aa4bc57d08ff198ea7fdd730e15892298f2d4f 100644 --- a/vipra-ui/app/models/topic.js +++ b/vipra-ui/app/models/topic.js @@ -4,6 +4,8 @@ export default DS.Model.extend({ name: DS.attr(), index: DS.attr(), words: DS.attr(), + created: DS.attr('date'), + modified: DS.attr('date'), _name: function() { var name = this.get('name'); diff --git a/vipra-ui/app/models/word.js b/vipra-ui/app/models/word.js new file mode 100644 index 0000000000000000000000000000000000000000..a18091a143581c6708b00d29aa9b69fac1f6e813 --- /dev/null +++ b/vipra-ui/app/models/word.js @@ -0,0 +1,9 @@ +import DS from 'ember-data'; + +export default DS.Model.extend({ + word: DS.attr(), + + _name: function() { + return this.get('id'); + }.property('id') +}); \ No newline at end of file diff --git a/vipra-ui/app/router.js b/vipra-ui/app/router.js index e856b1103fc1d241fcc738fdd3fbd9c30ecb939b..1d31b063ec3f6582c615afab414a55a5479c2f18 100644 --- a/vipra-ui/app/router.js +++ b/vipra-ui/app/router.js @@ -14,6 +14,9 @@ Router.map(function() { this.route('edit'); }); }); + this.route('words', function() { + this.route('show', { path: '/:word_id' }); + }); this.route('not-found', { path: '/*:' }); }); diff --git a/vipra-ui/app/routes/words/index.js b/vipra-ui/app/routes/words/index.js new file mode 100644 index 0000000000000000000000000000000000000000..e83ce4c68892db5333e925d5a8850336f47e8c55 --- /dev/null +++ b/vipra-ui/app/routes/words/index.js @@ -0,0 +1,9 @@ +import Ember from 'ember'; + +export default Ember.Route.extend({ + model() { + return Ember.RSVP.hash({ + words: this.store.findAll('word') + }); + } +}); \ No newline at end of file diff --git a/vipra-ui/app/routes/words/show.js b/vipra-ui/app/routes/words/show.js new file mode 100644 index 0000000000000000000000000000000000000000..e545d25b72fa7d093ab8a13e2ac7ffd5b9edcf63 --- /dev/null +++ b/vipra-ui/app/routes/words/show.js @@ -0,0 +1,9 @@ +import Ember from 'ember'; + +export default Ember.Route.extend({ + model(params) { + return Ember.RSVP.hash({ + word: this.store.find('word', params.word_id) + }); + } +}); \ No newline at end of file diff --git a/vipra-ui/app/templates/articles/index.hbs b/vipra-ui/app/templates/articles/index.hbs index 9b8d3390bf86c001a7e35f1f79ef5acd5d1d78b2..e975dcaae2c20db5fc786a227795c71cea31dc7b 100644 --- a/vipra-ui/app/templates/articles/index.hbs +++ b/vipra-ui/app/templates/articles/index.hbs @@ -4,4 +4,4 @@ {{debounced-input placeholder='Filter' size='50' value=filter debounce='150'}} -{{articles-list items=model.articles filter=filter}} \ No newline at end of file +{{items-list items=model.articles filter=filter route='articles.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/components/articles-list.hbs b/vipra-ui/app/templates/components/articles-list.hbs deleted file mode 100644 index c4c3d1af7f4072cd95842c58da187c0f406a1f9e..0000000000000000000000000000000000000000 --- a/vipra-ui/app/templates/components/articles-list.hbs +++ /dev/null @@ -1,5 +0,0 @@ -<ol> - {{#each filteredItems as |article|}} - <li>{{#link-to 'articles.show' article.id}}{{text-marker text=article.title mark=filter}}{{/link-to}}</li> - {{/each}} -</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/components/items-list.hbs b/vipra-ui/app/templates/components/items-list.hbs new file mode 100644 index 0000000000000000000000000000000000000000..8402078b74f1895d8be0c7224c516ade137cf0d5 --- /dev/null +++ b/vipra-ui/app/templates/components/items-list.hbs @@ -0,0 +1,5 @@ +<ol> + {{#each filteredItems as |item|}} + <li>{{#link-to route item.id}}{{text-marker text=item._name mark=filter}}{{/link-to}}</li> + {{/each}} +</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/components/topics-list.hbs b/vipra-ui/app/templates/components/topics-list.hbs deleted file mode 100644 index 5ab07c256db216380b2dc55cb8825ba7a8052105..0000000000000000000000000000000000000000 --- a/vipra-ui/app/templates/components/topics-list.hbs +++ /dev/null @@ -1,5 +0,0 @@ -<ol> - {{#each filteredItems as |topic|}} - <li>{{#link-to 'topics.show' topic.id}}{{text-marker text=topic._name mark=filter}}{{/link-to}}</li> - {{/each}} -</ol> \ No newline at end of file diff --git a/vipra-ui/app/templates/index.hbs b/vipra-ui/app/templates/index.hbs index e0aaedcbe2d94fc3a581d4c30c193f4d3c7ca7ff..c7e27e938d0a980c7f81a15f59e1281cc51faf71 100644 --- a/vipra-ui/app/templates/index.hbs +++ b/vipra-ui/app/templates/index.hbs @@ -1,5 +1,5 @@ <h1>Vipra</h1> -{{#link-to 'articles'}}Articles{{/link-to}} - -{{#link-to 'topics'}}Topics{{/link-to}} \ No newline at end of file +{{#link-to 'articles'}}Articles{{/link-to}}<br> +{{#link-to 'topics'}}Topics{{/link-to}}<br> +{{#link-to 'words'}}Words{{/link-to}} \ No newline at end of file diff --git a/vipra-ui/app/templates/topics/index.hbs b/vipra-ui/app/templates/topics/index.hbs index a8a4eece3ceafbe0649445c4f7f031a0e6a26227..9b46b3c2f774c505cb51fdd36180f9b84be0160c 100644 --- a/vipra-ui/app/templates/topics/index.hbs +++ b/vipra-ui/app/templates/topics/index.hbs @@ -2,4 +2,4 @@ {{debounced-input placeholder='Filter' size='50' value=filter debounce='150'}} -{{topics-list items=model.topics filter=filter}} \ No newline at end of file +{{items-list items=model.topics filter=filter route='topics.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/topics/show/index.hbs b/vipra-ui/app/templates/topics/show/index.hbs index e7f210342630bac1cc8bf90b340116afe2218b87..c2787301e1a5e68e5cb47d9612d7382106671a5a 100644 --- a/vipra-ui/app/templates/topics/show/index.hbs +++ b/vipra-ui/app/templates/topics/show/index.hbs @@ -1,3 +1,22 @@ {{#link-to 'topics.show.edit'}}Edit{{/link-to}} -<h2>{{model.topic._name}}</h2> \ No newline at end of file +<h2>{{model.topic._name}}</h2> + +<h3>Words</h3> + +<table> + <thead> + <tr> + <th>Word</th> + <th>Likeliness</th> + </tr> + </thead> + <tbody> + {{#each model.topic.words as |word|}} + <tr> + <td>{{word.word}}</td> + <td>{{word.likeliness}}</td> + </tr> + {{/each}} + </tbody> +</table> \ No newline at end of file diff --git a/vipra-ui/app/templates/words.hbs b/vipra-ui/app/templates/words.hbs new file mode 100644 index 0000000000000000000000000000000000000000..13d6081cc820dfd630a278038b27fd369cb369f5 --- /dev/null +++ b/vipra-ui/app/templates/words.hbs @@ -0,0 +1,6 @@ +<h1>Words</h1> +{{#link-to 'index'}}Top{{/link-to}} +{{#link-to 'words'}}All{{/link-to}} +<hr> + +{{outlet}} \ No newline at end of file diff --git a/vipra-ui/app/templates/words/index.hbs b/vipra-ui/app/templates/words/index.hbs new file mode 100644 index 0000000000000000000000000000000000000000..3f0dc91ae968739398804f6c5d07d78309bcca04 --- /dev/null +++ b/vipra-ui/app/templates/words/index.hbs @@ -0,0 +1,3 @@ +<h2>Found words</h2> + +{{items-list items=model.words filter=filter route='words.show'}} \ No newline at end of file diff --git a/vipra-ui/app/templates/words/show.hbs b/vipra-ui/app/templates/words/show.hbs new file mode 100644 index 0000000000000000000000000000000000000000..2778b0d59f24207fea2f91cb0fd4b04872266044 --- /dev/null +++ b/vipra-ui/app/templates/words/show.hbs @@ -0,0 +1 @@ +<h2>{{model.word.id}}</h2> \ No newline at end of file diff --git a/vipra-ui/tests/unit/routes/words-test.js b/vipra-ui/tests/unit/routes/words-test.js new file mode 100644 index 0000000000000000000000000000000000000000..ce62d399b4395bcff99bcbe5dc1129fb13c089bc --- /dev/null +++ b/vipra-ui/tests/unit/routes/words-test.js @@ -0,0 +1,11 @@ +import { moduleFor, test } from 'ember-qunit'; + +moduleFor('route:words', 'Unit | Route | words', { + // Specify the other units that are required for this test. + // needs: ['controller:foo'] +}); + +test('it exists', function(assert) { + let route = this.subject(); + assert.ok(route); +}); diff --git a/vipra-util/.project b/vipra-util/.project index 3eb2ee19593ed7f5d6c146794b6965a00956e603..8f910748986bb91f3b50301de8b035f643c557ad 100644 --- a/vipra-util/.project +++ b/vipra-util/.project @@ -25,6 +25,11 @@ <arguments> </arguments> </buildCommand> + <buildCommand> + <name>net.sourceforge.metrics.builder</name> + <arguments> + </arguments> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> @@ -32,5 +37,6 @@ <nature>org.eclipse.m2e.core.maven2Nature</nature> <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> + <nature>net.sourceforge.metrics.nature</nature> </natures> </projectDescription> diff --git a/vipra-util/pom.xml b/vipra-util/pom.xml index daa885aaab75647786c334887bb428eb86a6db3c..7809631072e6a1215271c217742ab54b1faacbeb 100644 --- a/vipra-util/pom.xml +++ b/vipra-util/pom.xml @@ -11,6 +11,7 @@ <maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.source>1.8</maven.compiler.source> <log4jVersion>2.4.1</log4jVersion> + <jacksonVersion>2.7.0</jacksonVersion> </properties> <dependencies> @@ -61,5 +62,17 @@ <artifactId>elasticsearch</artifactId> <version>2.1.0</version> </dependency> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jacksonVersion}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jacksonVersion}</version> + </dependency> </dependencies> </project> \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 22072d5de9984be43d0047df0209f1bd6230cce9..90aff81b9fc230a5d5529671422727fa66411621 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -30,25 +30,38 @@ public class Constants { public static final int DEFAULT_PORT = 27017; public static final String DEFAULT_DB = "test"; + /* + * TOPIC MODELING + */ + /** - * The global date time format. Will be used for conversion from and to - * database and frontend dates. + * The number of words to be used to generate a topic name. The top n words + * (sorted by likeliness) are used to generate a name for unnamed topics. */ - public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; + public static final int AUTO_TOPIC_WORDS = 4; /** - * Disallowed chars for words in processed text segments. This regular - * expression is used to strip text of characters that should not be - * processed. + * Number of topics to discover with topic modeling, if the selected topic + * modeling library supports this parameter. */ - public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9 ]"; + public static final int K_TOPICS = 20; /** - * The number of words to be used to generate a topic name. The top n words - * (sorted by likeliness) are used to generate a name for unnamed topics. + * Number of words in a discovered topic, if the selected topic modeling + * library supports this parameter. */ - public static final int AUTO_TOPIC_WORDS = 4; + public static final int K_TOPIC_WORDS = 50; + /** + * Precision of likeliness numbers. Likeliness is calculated for words to + * belong to topics. + */ + public static final int LIKELINESS_PRECISION = 6; + + /** + * Stopwords list. Extensive list of stopwords used to clean imported + * articles of the most common words before topic modeling is applied. + */ public static final List<String> STOPWORDS = Arrays.asList("'ll", "'ve", "a", "able", "about", "above", "abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "affected", "affecting", "affects", "after", "afterwards", "again", "against", "ah", "all", "almost", "alone", "along", @@ -123,6 +136,26 @@ public class Constants { "www", "x", "y", "year", "years", "yes", "yet", "you", "you'll", "you've", "youd", "young", "younger", "youngest", "your", "youre", "yours", "yourself", "yourselves", "z", "zero"); + /** + * Disallowed chars for words in processed text segments. This regular + * expression is used to strip text of characters that should not be + * processed. + */ + public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9 ]"; + + /* + * OTHER + */ + + /** + * The global date time format. Will be used for conversion from and to + * database and frontend dates. + */ + public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; + + /** + * The text processors available, including the default text processor + */ public static enum Processor { CUSTOM("custom"), CORENLP("corenlp"), @@ -151,6 +184,9 @@ public class Constants { } } + /** + * The topic modeling analyzers available, including the default analyzer. + */ public static enum Analyzer { JGIBB("jgibb"), DEFAULT(JGIBB); diff --git a/vipra-util/src/main/java/de/vipra/util/NumberUtils.java b/vipra-util/src/main/java/de/vipra/util/NumberUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..4206e9fd528552d2af5c5458d8dc34318e7d8210 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/NumberUtils.java @@ -0,0 +1,10 @@ +package de.vipra.util; + +public class NumberUtils { + + public static double roundToPrecision(double d, int precision) { + double p = Math.pow(10, precision); + return Math.round(d * p) / p; + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/WordMap.java b/vipra-util/src/main/java/de/vipra/util/WordMap.java similarity index 56% rename from vipra-util/src/main/java/de/vipra/util/model/WordMap.java rename to vipra-util/src/main/java/de/vipra/util/WordMap.java index 76351d34aa4fa65a6f03142cb87221a86f6f9ba3..69be359dc41a47f3e33adac01ca618f2da636d1b 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/WordMap.java +++ b/vipra-util/src/main/java/de/vipra/util/WordMap.java @@ -1,63 +1,46 @@ -package de.vipra.util.model; +package de.vipra.util; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.vipra.util.ex.DatabaseException; +import de.vipra.util.model.Word; import de.vipra.util.service.DatabaseService; -public class WordMap extends HashMap<String, Word> { +public class WordMap { - private static final long serialVersionUID = 8321873837437524923L; public static final Logger log = LoggerFactory.getLogger(WordMap.class); - private final DatabaseService<Word> dbWords; + private final DatabaseService<Word, String> dbWords; + private final Map<String, Word> wordMap; private boolean createNow = true; private long newWords = 0; - public WordMap(DatabaseService<Word> dbWords) { + public WordMap(DatabaseService<Word, String> dbWords) { this.dbWords = dbWords; + this.wordMap = new HashMap<>(); List<Word> words = dbWords.getAll(); for (Word word : words) - put(word.getWord().toLowerCase(), word); + wordMap.put(word.getWord().toLowerCase(), word); } - @Override public Word get(Object w) { String strWord = w.toString(); - Word word = super.get(strWord.toLowerCase()); + Word word = wordMap.get(strWord.toLowerCase()); if (word == null) { word = new Word(strWord); createWord(word); + wordMap.put(strWord, word); } return word; } - @Override - public Word put(String strWord, Word word) { - Word currentWord = get(strWord); - if (currentWord == null) { - if (word == null) - word = new Word(strWord); - createWord(word); - put(strWord, word); - currentWord = word; - } else { - currentWord.setWord(word.getWord()); - try { - dbWords.updateSingle(currentWord); - } catch (DatabaseException e) { - log.error("could not update word in database", e); - throw new RuntimeException(e); - } - } - return currentWord; - } - private Word createWord(Word word) { if (createNow) { try { @@ -71,14 +54,10 @@ public class WordMap extends HashMap<String, Word> { return word; } - public Word put(String strWord) { - return put(strWord, null); - } - public void create() throws DatabaseException { List<Word> newWords = new ArrayList<>(); - for (Entry<String, Word> e : this.entrySet()) - if (e.getValue().getId() == null) + for (Entry<String, Word> e : wordMap.entrySet()) + if (!e.getValue().isCreated()) newWords.add(e.getValue()); dbWords.createMultiple(newWords); this.newWords += newWords.size(); diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonField.java b/vipra-util/src/main/java/de/vipra/util/an/JsonField.java deleted file mode 100644 index 058f5301bfa97eca60a9a7f930051accc0c4154a..0000000000000000000000000000000000000000 --- a/vipra-util/src/main/java/de/vipra/util/an/JsonField.java +++ /dev/null @@ -1,14 +0,0 @@ -package de.vipra.util.an; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.FIELD) -public @interface JsonField { - - public String value() default ""; - -} diff --git a/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java b/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java deleted file mode 100644 index 70e5b17b880d5f318747978acc587cf8a61b64b5..0000000000000000000000000000000000000000 --- a/vipra-util/src/main/java/de/vipra/util/an/JsonIgnore.java +++ /dev/null @@ -1,14 +0,0 @@ -package de.vipra.util.an; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.FIELD) -public @interface JsonIgnore { - - public boolean value() default true; - -} \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index 72da35029f52ed7f53ba6db80a6059714751dba8..4122d62395b268f4927c68f808ad81b1199c253d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -12,18 +12,23 @@ import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import org.mongodb.morphia.annotations.PrePersist; import de.vipra.util.Constants; import de.vipra.util.FileUtils; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; +import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") +@JsonType("article") @Entity(value = "articles", noClassnameStored = true) -public class Article extends Model implements Serializable { +@Indexes({ @Index("title"), @Index("date") }) +public class Article extends FileModel implements Serializable { @Id private ObjectId id; @@ -52,7 +57,7 @@ public class Article extends Model implements Serializable { private ArticleStats stats; @JsonWrap("attributes") - private Date created = new Date(); + private Date created; @JsonWrap("attributes") private Date modified; @@ -155,6 +160,8 @@ public class Article extends Model implements Serializable { @PrePersist public void prePersist() { this.modified = new Date(); + if (this.created == null) + this.created = modified; } @Override diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index 06a3b1e9f9069fba467b892a8d9809d70cf83401..db1b8ad393ffd0665f75357546a6800de608a6b3 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -6,6 +6,8 @@ import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import de.vipra.util.WordMap; + @Entity public class ArticleStats implements Serializable { diff --git a/vipra-util/src/main/java/de/vipra/util/model/FileModel.java b/vipra-util/src/main/java/de/vipra/util/model/FileModel.java new file mode 100644 index 0000000000000000000000000000000000000000..09742a02e8a223e0c272fb2c7f52171e19793c6b --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/model/FileModel.java @@ -0,0 +1,21 @@ +package de.vipra.util.model; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; + +import de.vipra.util.Constants; + +@SuppressWarnings("serial") +public abstract class FileModel implements Model { + + public void writeToFile(File file) throws IOException { + FileUtils.writeStringToFile(file, toFileString(), Constants.FB_ENCODING, false); + } + + public abstract void fromFile(File file) throws IOException; + + public abstract String toFileString(); + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/Model.java b/vipra-util/src/main/java/de/vipra/util/model/Model.java index 8fa5aab18b609ce00af81ade8e02059ae472610e..0d133d54efd7a951c064d5ac15000c1d57b3c81d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Model.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Model.java @@ -1,39 +1,5 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; -import java.net.URI; -import java.net.URISyntaxException; -import org.apache.commons.io.FileUtils; -import org.bson.types.ObjectId; - -import de.vipra.util.Constants; - -@SuppressWarnings("serial") -public abstract class Model implements Serializable { - - public URI uri(URI base) { - try { - return new URI(base.toString() + "/" + getId().toString()); - } catch (URISyntaxException e) { - return null; - } - } - - public void writeToFile(File file) throws IOException { - FileUtils.writeStringToFile(file, toFileString(), Constants.FB_ENCODING, false); - } - - public abstract ObjectId getId(); - - public abstract void setId(ObjectId id); - - public abstract void setId(String id); - - public abstract void fromFile(File file) throws IOException; - - public abstract String toFileString(); - -} +public interface Model extends Serializable {} \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index 1e1d7a47af4967da703cde59b19124835c851a6d..0ac387d906e9c2fb0a7e42b682645392d494f6f1 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -1,19 +1,21 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import de.vipra.util.MongoUtils; -import de.vipra.util.ex.NotImplementedException; +import de.vipra.util.an.JsonType; @SuppressWarnings("serial") +@JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -public class Topic extends Model implements Serializable { +@Indexes(@Index("name")) +public class Topic implements Model, Serializable { @Id private ObjectId id; @@ -39,14 +41,4 @@ public class Topic extends Model implements Serializable { this.name = name; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); - } - - @Override - public String toFileString() { - throw new NotImplementedException(); - } - } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 5af0a958ee8598d8f87bed4b5cbb6a3213d064ac..412232b735518b5b1c35d40ebe6c2c3a567025c9 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -1,7 +1,5 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Date; @@ -19,12 +17,11 @@ import de.vipra.util.StringUtils; import de.vipra.util.an.JsonType; import de.vipra.util.an.JsonWrap; import de.vipra.util.an.QueryIgnore; -import de.vipra.util.ex.NotImplementedException; @SuppressWarnings("serial") @JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -public class TopicFull extends Model implements Serializable { +public class TopicFull implements Model, Serializable { @Id private ObjectId id; @@ -38,10 +35,10 @@ public class TopicFull extends Model implements Serializable { @Embedded @JsonWrap("attributes") @QueryIgnore(multi = true) - private List<TopicWord> topicWords; + private List<TopicWord> words; @JsonWrap("attributes") - private Date created = new Date(); + private Date created; @JsonWrap("attributes") private Date modified; @@ -74,12 +71,12 @@ public class TopicFull extends Model implements Serializable { this.index = index; } - public List<TopicWord> getTopicWords() { - return topicWords; + public List<TopicWord> getWords() { + return words; } - public void setTopicWords(List<TopicWord> topicWords) { - this.topicWords = topicWords; + public void setWords(List<TopicWord> topicWords) { + this.words = topicWords; } public Date getCreated() { @@ -98,16 +95,6 @@ public class TopicFull extends Model implements Serializable { this.modified = modified; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); - } - - @Override - public String toFileString() { - throw new NotImplementedException(); - } - @Override public String toString() { return TopicFull.class.getSimpleName() + "[id:" + getId() + ", name:" + getName() + ", created:" + created @@ -117,6 +104,8 @@ public class TopicFull extends Model implements Serializable { @PrePersist public void prePersist() { this.modified = new Date(); + if (this.created == null) + this.created = modified; } public static String getNameFromWords(List<TopicWord> words) { diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java index 0380a0dac47eb9e0375ceb7ec47cf70a9454d9f4..ef00fc43600162798ce4f8b6a5086ce16f4f8e1b 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicRef.java @@ -6,14 +6,11 @@ import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Reference; import org.mongodb.morphia.annotations.Transient; -import de.vipra.util.an.JsonIgnore; - @SuppressWarnings("serial") @Embedded public class TopicRef implements Comparable<TopicRef>, Serializable { @Transient - @JsonIgnore private String topicId; @Reference private Topic topic; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java index 5bf9a5cbcea3b5af21eee0060a7c34cdfcac08c8..abed0a8abfbf7641b852e6ae2a0f9728354b6e32 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java @@ -3,14 +3,23 @@ package de.vipra.util.model; import java.io.Serializable; import org.mongodb.morphia.annotations.Embedded; +import org.mongodb.morphia.annotations.PostLoad; import org.mongodb.morphia.annotations.Reference; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; + @SuppressWarnings("serial") @Embedded public class TopicWord implements Comparable<TopicWord>, Serializable { @Reference + @JsonIgnore private Word word; + + @JsonProperty("word") + private String wordString; + private double likeliness; public TopicWord() {} @@ -28,6 +37,10 @@ public class TopicWord implements Comparable<TopicWord>, Serializable { this.word = word; } + public String getWordString() { + return wordString; + } + public double getLikeliness() { return likeliness; } @@ -51,4 +64,9 @@ public class TopicWord implements Comparable<TopicWord>, Serializable { return TopicWord.class.getSimpleName() + "[word:" + word + ", likeliness:" + likeliness + "]"; } + @PostLoad + private void postLoad() { + this.wordString = word.getWord(); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java index 2effbd0d5bae56c6ff4ba584bb195b52beed9526..62ee93d059a5b91bd2d495aa1002dad6d0dcb7bd 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Word.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java @@ -1,35 +1,57 @@ package de.vipra.util.model; -import java.io.File; -import java.io.IOException; import java.io.Serializable; -import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.PostLoad; +import org.mongodb.morphia.annotations.PostPersist; +import org.mongodb.morphia.annotations.Transient; -import de.vipra.util.MongoUtils; -import de.vipra.util.ex.NotImplementedException; +import com.fasterxml.jackson.annotation.JsonIgnore; + +import de.vipra.util.an.JsonType; @SuppressWarnings("serial") +@JsonType("word") @Entity(value = "words", noClassnameStored = true) -public class Word extends Model implements Serializable { +public class Word implements Model, Serializable { + + /** + * This is the id. It is used by the frontend, which expects an 'id' field. + * This field is populated on load from the database and it is not stored. + */ + @Transient + private String id; + /** + * This is the actual word. It is used as the database id and is not + * returned to the frontend. + */ @Id - private ObjectId id; + @JsonIgnore private String word; + /** + * The created variable is a helper that marks non-persisted new words in + * the import process. Each word with created = false will be saved before + * topics and topics references are created. + */ + @Transient + @JsonIgnore + private boolean created = false; + public Word() {} public Word(String word) { this.word = word; } - public ObjectId getId() { + public String getId() { return id; } - public void setId(ObjectId id) { + public void setId(String id) { this.id = id; } @@ -39,21 +61,22 @@ public class Word extends Model implements Serializable { public void setWord(String word) { this.word = word; + this.id = word; } - @Override - public void setId(String id) { - this.id = MongoUtils.objectId(id); + public boolean isCreated() { + return created; } - @Override - public void fromFile(File file) throws IOException { - throw new NotImplementedException(); + public void setCreated(boolean created) { + this.created = created; } - @Override - public String toFileString() { - throw new NotImplementedException(); + @PostLoad + @PostPersist + private void post() { + this.id = word; + this.created = true; } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index ed3f222ac266c81268e5b544225c0bef91b7ac70..9a1d4e0560c9193dafca7770c1187705b0dd07c8 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -2,22 +2,23 @@ package de.vipra.util.service; import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; -import org.bson.types.ObjectId; import org.mongodb.morphia.Datastore; import org.mongodb.morphia.query.Query; import de.vipra.util.Config; import de.vipra.util.ListUtils; import de.vipra.util.Mongo; -import de.vipra.util.MongoUtils; import de.vipra.util.an.QueryIgnore; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.Model; -public class DatabaseService<T extends Model> implements Service<T, DatabaseException> { +public class DatabaseService<T extends Model, U> implements Service<T, U, DatabaseException> { private final Datastore datastore; private final Class<T> clazz; @@ -45,15 +46,17 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce } @Override - public T getSingle(String id) { - Query<T> q = datastore.createQuery(clazz).field("_id").equal(new ObjectId(id)); - if (ignoredFieldsSingle.length > 0) + public T getSingle(U id, String... fields) { + Query<T> q = datastore.createQuery(clazz).field("_id").equal(id); + if (fields != null && fields.length > 0) + q.retrievedFields(true, setMinus(fields, ignoredFieldsSingle)); + else if (ignoredFieldsSingle.length > 0) q.retrievedFields(false, ignoredFieldsSingle); return q.get(); } @Override - public List<T> getMultiple(Integer skip, Integer limit, String sortBy) { + public List<T> getMultiple(Integer skip, Integer limit, String sortBy, String... fields) { Query<T> q = datastore.createQuery(clazz); if (skip != null) q.offset(skip); @@ -61,15 +64,17 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce q.limit(limit); if (sortBy != null) q.order(sortBy); - if (ignoredFieldsMulti.length > 0) + if (fields != null && fields.length > 0) + q.retrievedFields(true, setMinus(fields, ignoredFieldsMulti)); + else if (ignoredFieldsMulti.length > 0) q.retrievedFields(false, ignoredFieldsMulti); List<T> list = q.asList(); return list; } @Override - public List<T> getAll() { - return getMultiple(null, null, null); + public List<T> getAll(String... fields) { + return getMultiple(null, null, null, fields); } @Override @@ -86,8 +91,8 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce } @Override - public long deleteSingle(String id) throws DatabaseException { - return datastore.delete(MongoUtils.objectId(id)).getN(); + public long deleteSingle(U id) throws DatabaseException { + return datastore.delete(id).getN(); } @Override @@ -105,10 +110,20 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce return datastore.getCount(clazz); } - public static <T extends Model> DatabaseService<T> getDatabaseService(Config config, Class<T> clazz) + public static <T extends Model, U> DatabaseService<T, U> getDatabaseService(Config config, Class<T> clazz) throws ConfigException { Mongo mongo = Mongo.getInstance(config); - return new DatabaseService<T>(mongo, clazz); + return new DatabaseService<T, U>(mongo, clazz); + } + + private String[] setMinus(String[] a, String[] b) { + if (a != null && b != null) { + Set<String> sa = new HashSet<>(Arrays.asList(a)); + Set<String> sb = new HashSet<>(Arrays.asList(b)); + sa.removeAll(sb); + return sa.toArray(new String[sa.size()]); + } + return a; } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java index 659a57956c81c6c03b1f76269cfe35980beb1110..4fb210eb71017a874806edb094e74ebfe744b8ff 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/Service.java +++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java @@ -4,21 +4,21 @@ import java.util.List; import de.vipra.util.model.Model; -public interface Service<T extends Model, E extends Exception> { +public interface Service<Type extends Model, IdType, E extends Exception> { - T getSingle(String id) throws E; + Type getSingle(IdType id, String... fields) throws E; - List<T> getMultiple(Integer skip, Integer limit, String sortBy) throws E; + List<Type> getMultiple(Integer skip, Integer limit, String sortBy, String... fields) throws E; - List<T> getAll() throws E; + List<Type> getAll(String... fields) throws E; - T createSingle(T t) throws E; + Type createSingle(Type t) throws E; - List<T> createMultiple(Iterable<T> t) throws E; + List<Type> createMultiple(Iterable<Type> t) throws E; - long deleteSingle(String id) throws E; + long deleteSingle(IdType id) throws E; - void updateSingle(T t) throws E; + void updateSingle(Type t) throws E; void drop() throws E;