diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java index ff1c102db0b20bf19818850efe779e8605992d75..74c809ec4d09adc853939ab6cfaa6b8b02a5da08 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java @@ -34,6 +34,7 @@ import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; +import de.vipra.util.model.TopicModel; import de.vipra.util.service.MongoService; import de.vipra.util.service.Service.QueryBuilder; @@ -52,7 +53,7 @@ public class ArticleResource { @GET @Produces(MediaType.APPLICATION_JSON) - public Response getArticles(@QueryParam("model") final String model, @QueryParam("skip") final Integer skip, + public Response getArticles(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") final Integer skip, @QueryParam("limit") final Integer limit, @QueryParam("sort") @DefaultValue("date") final String sortBy, @QueryParam("fields") final String fields, @QueryParam("word") final String word) { final ResponseWrapper<List<ArticleFull>> res = new ResponseWrapper<>(); @@ -65,8 +66,8 @@ public class ArticleResource { if (fields != null && !fields.isEmpty()) query.fields(true, StringUtils.getFields(fields)); - if (model != null && !model.isEmpty()) - query.criteria("model.id", model); + if (topicModel != null && !topicModel.isEmpty()) + query.criteria("topicModel", new TopicModel(topicModel)); if (word != null && !word.isEmpty()) query.criteria("words.word.id", word); diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java index 466ec1003d16bd59a9849ef5397e29524d9d7470..7a5c47d79593929fe8d0e557f27c7af7aaf22265 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java @@ -68,17 +68,15 @@ public class InfoResource { info.put("const.esboosttitles", Constants.ES_BOOST_TITLES); info.put("const.topicautoname", Constants.TOPIC_AUTO_NAMING_WORDS); info.put("const.ktopics", Constants.K_TOPICS); - info.put("const.ktopicwords", Constants.K_TOPIC_WORDS); info.put("const.decaylambda", Constants.RISING_DECAY_LAMBDA); info.put("const.minrelprob", Constants.MIN_RELATIVE_PROB); - info.put("const.minshare", Constants.MINIMUM_SHARE); info.put("const.maxsimdocs", Constants.MAX_SIMILAR_DOCUMENTS); info.put("const.maxdiv", Constants.MAX_SIMILAR_DOCUMENTS_DIVERGENCE); info.put("const.dynminiter", Constants.DYNAMIC_MIN_ITER); info.put("const.dynmaxiter", Constants.DYNAMIC_MAX_ITER); info.put("const.statiter", Constants.STATIC_ITER); - info.put("const.docminfreq", Constants.DOCUMENT_MIN_WORD_FREQ); info.put("const.docminlength", Constants.DOCUMENT_MIN_LENGTH); + info.put("const.docminwordfreq", Constants.DOCUMENT_MIN_WORD_FREQ); info.put("const.charsdisallow", Constants.CHARS_DISALLOWED); info.put("const.regexemail", Constants.REGEX_EMAIL); info.put("const.regexurl", Constants.REGEX_URL); diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java index 434b58ee7243627b06208f8cf854db5e9b3cb655..18a39fd02418f3f65454d7f481243d04e44c02d0 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java @@ -45,7 +45,7 @@ public class SearchResource { @GET @Produces(MediaType.APPLICATION_JSON) - public Response doSearch(@QueryParam("model") final String model, @QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, + public Response doSearch(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, @QueryParam("fields") final String fields, @QueryParam("query") final String query) { final ResponseWrapper<List<ArticleFull>> res = new ResponseWrapper<>(); @@ -59,8 +59,8 @@ public class SearchResource { return res.noContent(); String indexName = "_all"; - if (model != null && !model.isEmpty()) - indexName = model + "-articles"; + if (topicModel != null && !topicModel.isEmpty()) + indexName = topicModel + "-articles"; SearchResponse response = null; try { diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/SequenceResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/SequenceResource.java index b22aed1860b69d60f86aa8701308e5bf6a76bcaa..e2c66f9af85d7b8451472762abceb539c9ca3aa1 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/SequenceResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/SequenceResource.java @@ -24,6 +24,7 @@ import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.SequenceFull; +import de.vipra.util.model.TopicModel; import de.vipra.util.service.MongoService; import de.vipra.util.service.Service.QueryBuilder; @@ -39,7 +40,7 @@ public class SequenceResource { @GET @Produces(MediaType.APPLICATION_JSON) - public Response getSequences(@QueryParam("model") final String model, @QueryParam("skip") final Integer skip, + public Response getSequences(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") final Integer skip, @QueryParam("limit") final Integer limit, @QueryParam("sort") @DefaultValue("id") final String sortBy, @QueryParam("fields") final String fields) { final ResponseWrapper<List<SequenceFull>> res = new ResponseWrapper<>(); @@ -52,8 +53,8 @@ public class SequenceResource { if (fields != null && !fields.isEmpty()) query.fields(true, StringUtils.getFields(fields)); - if (model != null && !model.isEmpty()) - query.criteria("model.id", model); + if (topicModel != null && !topicModel.isEmpty()) + query.criteria("topicModel", new TopicModel(topicModel)); final List<SequenceFull> sequences = dbSequences.getMultiple(query); diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java index c749773e1e8779f299d1de512d81aea54eaa3fc8..3f422d62145b3f43ae7132a7b67032ce5572789c 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java @@ -30,6 +30,7 @@ import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; import de.vipra.util.model.Topic; import de.vipra.util.model.TopicFull; +import de.vipra.util.model.TopicModel; import de.vipra.util.service.MongoService; import de.vipra.util.service.Service.QueryBuilder; @@ -47,7 +48,7 @@ public class TopicResource { @GET @Produces(MediaType.APPLICATION_JSON) - public Response getTopics(@QueryParam("model") final String model, @QueryParam("skip") final Integer skip, + public Response getTopics(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") final Integer skip, @QueryParam("limit") final Integer limit, @QueryParam("sort") @DefaultValue("name") final String sortBy, @QueryParam("fields") final String fields) { final ResponseWrapper<List<TopicFull>> res = new ResponseWrapper<>(); @@ -60,8 +61,8 @@ public class TopicResource { if (fields != null && !fields.isEmpty()) query.fields(true, StringUtils.getFields(fields)); - if (model != null && !model.isEmpty()) - query.criteria("model.id", model); + if (topicModel != null && !topicModel.isEmpty()) + query.criteria("topicModel", new TopicModel(topicModel)); final List<TopicFull> topics = dbTopics.getMultiple(query); diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/WindowResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/WindowResource.java index 8528a5d7d9fd31c30185d4e315f56050b063e90a..6b3bed96c32a2e41efc086673d1716593f98c1cb 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/WindowResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/WindowResource.java @@ -18,6 +18,7 @@ import de.vipra.rest.model.ResponseWrapper; import de.vipra.util.Config; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; +import de.vipra.util.model.TopicModel; import de.vipra.util.model.Window; import de.vipra.util.service.MongoService; import de.vipra.util.service.Service.QueryBuilder; @@ -34,7 +35,7 @@ public class WindowResource { @GET @Produces(MediaType.APPLICATION_JSON) - public Response getWindows(@QueryParam("model") final String model, @QueryParam("skip") final Integer skip, + public Response getWindows(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") final Integer skip, @QueryParam("limit") final Integer limit, @QueryParam("sort") @DefaultValue("startDate") final String sortBy, @QueryParam("fields") final String fields) { final ResponseWrapper<List<Window>> res = new ResponseWrapper<>(); @@ -47,8 +48,8 @@ public class WindowResource { if (fields != null && !fields.isEmpty()) query.fields(true, StringUtils.getFields(fields)); - if (model != null && !model.isEmpty()) - query.criteria("model.id", model); + if (topicModel != null && !topicModel.isEmpty()) + query.criteria("topicModel", new TopicModel(topicModel)); final List<Window> windows = dbWindows.getMultiple(query); diff --git a/vipra-cmd/pom.xml b/vipra-cmd/pom.xml index 5c87afc82927589f029ac08340756eddef4b1e08..9b81219dcc7b123f3f47fd173cb4698ebaf6c10c 100644 --- a/vipra-cmd/pom.xml +++ b/vipra-cmd/pom.xml @@ -15,7 +15,7 @@ <maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.source>1.8</maven.compiler.source> <maven.build.timestamp.format>yyMMdd_HHmm</maven.build.timestamp.format> - <buildDate>${maven.build.timestamp}</buildDate> + <buildDate>${maven.build.timestamp}</buildDate> </properties> <scm> @@ -52,23 +52,6 @@ <classifier>models</classifier> </dependency> - <!-- Logging --> - <dependency> - <groupId>org.apache.logging.log4j</groupId> - <artifactId>log4j-api</artifactId> - <version>2.5</version> - </dependency> - <dependency> - <groupId>org.apache.logging.log4j</groupId> - <artifactId>log4j-core</artifactId> - <version>2.5</version> - </dependency> - <dependency> - <groupId>org.apache.logging.log4j</groupId> - <artifactId>log4j-slf4j-impl</artifactId> - <version>2.5</version> - </dependency> - <!-- MongoDB Database Adapter --> <dependency> <groupId>org.mongodb.morphia</groupId> diff --git a/vipra-cmd/runcfg/CMD - Clear.launch b/vipra-cmd/runcfg/CMD - Clear.launch index 054ed66ad23da2f5d9078266ac4c921e2db471fb..9f94416a0dd6b053b2837da6b8612eee1332afc0 100644 --- a/vipra-cmd/runcfg/CMD - Clear.launch +++ b/vipra-cmd/runcfg/CMD - Clear.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--clear"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-cC yearly"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml -ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Config.launch b/vipra-cmd/runcfg/CMD - Config.launch deleted file mode 100644 index ba73d8cf049b84061ae5e882be1d504bdd14d9af..0000000000000000000000000000000000000000 --- a/vipra-cmd/runcfg/CMD - Config.launch +++ /dev/null @@ -1,18 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> -<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> -<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> -</listAttribute> -<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> -<listEntry value="1"/> -</listAttribute> -<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> -<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> -</listAttribute> -<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-o"/> -<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> -<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> -</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Help.launch b/vipra-cmd/runcfg/CMD - Help.launch index 2528a35c417af8984b402da9e0de8a062449cfa1..a18a81cf29654e9a32b4e790f1a3e782375af4a6 100644 --- a/vipra-cmd/runcfg/CMD - Help.launch +++ b/vipra-cmd/runcfg/CMD - Help.launch @@ -14,5 +14,5 @@ <stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-h"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 1.launch b/vipra-cmd/runcfg/CMD - Import 1.launch index 62653261e0ef063ca9113936fee95a92f65d28be..a907ac70fcff4a2caf844e741cecb239fbd3820c 100644 --- a/vipra-cmd/runcfg/CMD - Import 1.launch +++ b/vipra-cmd/runcfg/CMD - Import 1.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-1.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AI /home/eike/repos/master/ma-impl/vm/data/test-1.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 10.launch b/vipra-cmd/runcfg/CMD - Import 10.launch index 1d6c112bd7d743c6ccf7448fa8f42d853a5c431d..f3178f97573e3962eb12af7244d146d348761955 100644 --- a/vipra-cmd/runcfg/CMD - Import 10.launch +++ b/vipra-cmd/runcfg/CMD - Import 10.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-10.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AI /home/eike/repos/master/ma-impl/vm/data/test-10.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 2.launch b/vipra-cmd/runcfg/CMD - Import 2.launch index 89c246dd7ba64c418b3875e63aada477ff6caa5b..16d1dbf5bc8dbc84d1e367f366f9692da03bdeb8 100644 --- a/vipra-cmd/runcfg/CMD - Import 2.launch +++ b/vipra-cmd/runcfg/CMD - Import 2.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-2.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AI /home/eike/repos/master/ma-impl/vm/data/test-2.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 25.launch b/vipra-cmd/runcfg/CMD - Import 25.launch index 60e6ec53b3752a8306084103e9bc4ccf5a0b7d4c..b548bdd708de8e39a7467f27cc07f4f8ab0f43f8 100644 --- a/vipra-cmd/runcfg/CMD - Import 25.launch +++ b/vipra-cmd/runcfg/CMD - Import 25.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-25.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AI /home/eike/repos/master/ma-impl/vm/data/test-25.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import All.launch b/vipra-cmd/runcfg/CMD - Import All.launch index 750a14c54eeb0b90e20556e6e97e15072123b875..0e8a0c856d168151ab55e7dc839186ae33a50330 100644 --- a/vipra-cmd/runcfg/CMD - Import All.launch +++ b/vipra-cmd/runcfg/CMD - Import All.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/data.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AI /home/eike/repos/master/ma-impl/vm/data/data.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Indexing.launch b/vipra-cmd/runcfg/CMD - Indexing.launch index 801f3d3546cf52797d7c339394cd34819902ec59..0e3fecb125ac97797b96daa17da6c22724631ed2 100644 --- a/vipra-cmd/runcfg/CMD - Indexing.launch +++ b/vipra-cmd/runcfg/CMD - Indexing.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-e"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-Ai"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Modeling.launch b/vipra-cmd/runcfg/CMD - Modeling.launch index 9560a948281cff94a0c57ab56a3b27264e02007c..b4a690b51c621da0f8688a35d60797482c2706e2 100644 --- a/vipra-cmd/runcfg/CMD - Modeling.launch +++ b/vipra-cmd/runcfg/CMD - Modeling.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-g"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-AM"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Reread.launch b/vipra-cmd/runcfg/CMD - Reread.launch index 3d3851ee426eb65a2e42586756ff487f1fab42d1..c7c1d7de9494f2b762f4890a4baa0c75ce38cd52 100644 --- a/vipra-cmd/runcfg/CMD - Reread.launch +++ b/vipra-cmd/runcfg/CMD - Reread.launch @@ -11,8 +11,8 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-r"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-Ar"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Test.launch b/vipra-cmd/runcfg/CMD - Test.launch index 2e69b94c298f5848c58e40e2bac55c7763b023f3..b36db2e1f99f6ae77ceb1a8501b802a2a37c5e24 100644 --- a/vipra-cmd/runcfg/CMD - Test.launch +++ b/vipra-cmd/runcfg/CMD - Test.launch @@ -14,5 +14,5 @@ <stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-t"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> -<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> </launchConfiguration> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java index 0e83857d6bc8a6ab82a224efefbfcb27e5140951..ac6610e5c718baf7f93e9fd2aa352c472db5577f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java @@ -11,17 +11,18 @@ public class CommandLineOptions { public static final Option CLEAR = Option.builder("c").longOpt("clear").desc("clear the database and models").build(); public static final Option DEBUG = Option.builder("d").longOpt("debug").desc("show debug information").build(); - public static final Option EDIT = Option.builder("e").longOpt("edit").desc("edit a specific model").hasArg().argName("model").build(); public static final Option HELP = Option.builder("h").longOpt("help").desc("show this help").build(); - public static final Option INDEX = Option.builder("i").longOpt("index").desc("create the search index").build(); + public static final Option INDEX = Option.builder("i").longOpt("index").desc("create the search index on selected models").build(); public static final Option LIST = Option.builder("l").longOpt("list").desc("list available models").build(); public static final Option REREAD = Option.builder("r").longOpt("reread").desc("reread generated models").build(); public static final Option SILENT = Option.builder("s").longOpt("silent").desc("suppress all output").build(); public static final Option TEST = Option.builder("t").longOpt("test").desc("test database connections").build(); + public static final Option ALL = Option.builder("A").longOpt("all").desc("select all models, short for -S all").build(); public static final Option CREATE = Option.builder("C").longOpt("create").desc("create new models").hasArgs().argName("models...").build(); - public static final Option DELETE = Option.builder("d").longOpt("delete").desc("delete existing models").hasArgs().argName("models...").build(); - public static final Option IMPORT = Option.builder("I").longOpt("import").desc("import data from json").hasArgs().argName("models...").build(); - public static final Option MODEL = Option.builder("M").longOpt("model").desc("generate models from database").build(); + public static final Option DELETE = Option.builder("D").longOpt("delete").desc("delete existing models").hasArgs().argName("models...").build(); + public static final Option IMPORT = Option.builder("I").longOpt("import").desc("import data from json into selected models").hasArgs() + .argName("models...").build(); + public static final Option MODEL = Option.builder("M").longOpt("model").desc("generate topics on selected models").build(); public static final Option SELECT = Option.builder("S").longOpt("select").desc("select models").hasArgs().argName("models...").build(); private final Options options; @@ -29,7 +30,7 @@ public class CommandLineOptions { private final String cmdName = "vipra"; public CommandLineOptions() { - final Option[] optionsArray = { CLEAR, DEBUG, EDIT, HELP, INDEX, LIST, REREAD, SILENT, TEST, CREATE, DELETE, IMPORT, MODEL, SELECT }; + final Option[] optionsArray = { CLEAR, DEBUG, HELP, INDEX, LIST, REREAD, SILENT, TEST, ALL, CREATE, DELETE, IMPORT, MODEL, SELECT }; options = new Options(); for (final Option option : optionsArray) options.addOption(option); @@ -60,14 +61,6 @@ public class CommandLineOptions { return hasOption(DEBUG) && !hasOption(SILENT); } - public boolean isEdit() { - return hasOption(EDIT); - } - - public String modelToEdit() { - return getOptionValue(EDIT); - } - public boolean isHelp() { return hasOption(HELP); } @@ -109,6 +102,10 @@ public class CommandLineOptions { return hasOption(CREATE); } + public boolean isAll() { + return hasOption(ALL); + } + public String[] modelsToCreate() { return getOptionValues(CREATE); } @@ -142,13 +139,16 @@ public class CommandLineOptions { } public String[] selectedModels() { - return getOptionValues(SELECT); + if (isAll()) + return new String[] { "all" }; + else + return getOptionValues(SELECT); } private void checkDependencies() throws ParseException { - if (isImport() || isModel() || isIndex() || isReread() || isDelete()) { + if (isImport() || isModel() || isIndex() || isReread()) { // these options require at least one selected model - if (!isSelect()) + if (!isSelect() && !isAll()) throw new ParseException("select at least one model"); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 3d9bf43085a2592e24a7b776d4e6173c7a5fc272..a6c4906d73212daaf790fef8ae56929d8a449976 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -4,14 +4,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.ListIterator; -import java.util.Map.Entry; - -import org.apache.logging.log4j.Level; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.core.LoggerContext; -import org.apache.logging.log4j.core.config.Configuration; -import org.apache.logging.log4j.core.config.LoggerConfig; + import org.mongodb.morphia.logging.MorphiaLoggerFactory; import org.mongodb.morphia.logging.slf4j.SLF4JLoggerImplFactory; @@ -19,18 +12,16 @@ import de.vipra.cmd.option.ClearCommand; import de.vipra.cmd.option.Command; import de.vipra.cmd.option.CreateModelCommand; import de.vipra.cmd.option.DeleteModelCommand; -import de.vipra.cmd.option.EditModelCommand; import de.vipra.cmd.option.ImportCommand; import de.vipra.cmd.option.IndexingCommand; import de.vipra.cmd.option.ListModelsCommand; import de.vipra.cmd.option.ModelingCommand; import de.vipra.cmd.option.TestCommand; +import de.vipra.util.ConsoleUtils; import de.vipra.util.ex.ConfigException; public class Main { - public static final Logger log = LogManager.getLogger(Main.class); - static { // set morphia log level MorphiaLoggerFactory.registerLogger(SLF4JLoggerImplFactory.class); @@ -43,7 +34,7 @@ public class Main { try { opts.parse(args); } catch (final Exception e) { - log.error(e.getMessage()); + ConsoleUtils.error(e.getMessage()); opts.printHelp(); return; } @@ -54,24 +45,19 @@ public class Main { } // logger configuration - final LoggerContext loggerContext = (LoggerContext) LogManager.getContext(false); - final Configuration loggerConfigs = loggerContext.getConfiguration(); - - if (opts.isDebug()) - loggerConfigs.getLoggerConfig(LogManager.ROOT_LOGGER_NAME).setLevel(Level.DEBUG); - if (opts.isSilent()) { - for (final Entry<String, LoggerConfig> loggerConfig : loggerConfigs.getLoggers().entrySet()) - loggerConfig.getValue().setLevel(Level.OFF); - } + ConsoleUtils.setSilent(opts.isSilent()); - loggerContext.updateLoggers(); + // commands final List<Command> commands = new ArrayList<>(); if (opts.isTest()) commands.add(new TestCommand()); + if (opts.isClear()) + commands.add(new ClearCommand()); + if (opts.isCreate()) commands.add(new CreateModelCommand(opts.modelsToCreate())); @@ -81,12 +67,6 @@ public class Main { if (opts.isList()) commands.add(new ListModelsCommand()); - if (opts.isEdit()) - commands.add(new EditModelCommand(opts.modelToEdit())); - - if (opts.isClear()) - commands.add(new ClearCommand()); - if (opts.isImport()) commands.add(new ImportCommand(opts.selectedModels(), opts.filesToImport())); @@ -96,7 +76,8 @@ public class Main { if (opts.isIndex()) commands.add(new IndexingCommand(opts.selectedModels())); - // run commands + // run + if (commands.size() > 0) { for (final ListIterator<Command> it = commands.listIterator(); it.hasNext();) { final Command c = it.next(); @@ -105,10 +86,9 @@ public class Main { } catch (final Exception e) { final Throwable cause = e.getCause(); if (cause != null) - log.error(cause.getMessage()); + ConsoleUtils.error(cause.getMessage()); else - log.error(e.getMessage()); - log.debug(e.getMessage(), e); + ConsoleUtils.error(e.getMessage()); } } } else { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 7eb8131920492241a938ca03945a0178f7767a5c..e5693aa8683d2ef0529671237bccc317690dfcee 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -13,9 +13,9 @@ import java.util.HashMap; import java.util.Map; import de.vipra.util.Constants; -import de.vipra.util.ModelConfig; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.ArticleFull; +import de.vipra.util.model.TopicModelConfig; public class Filebase { @@ -28,7 +28,7 @@ public class Filebase { private final FilebaseWordIndex wordIndex; private final FilebaseWindowIndex windowIndex; - public Filebase(final ModelConfig modelConfig, final File dataDir) throws ParseException, IOException { + public Filebase(final TopicModelConfig modelConfig, final File dataDir) throws ParseException, IOException { modelDir = new File(dataDir, modelConfig.getName()); file = new File(modelDir, FILE_NAME); newArticles = new HashMap<>(); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWindowIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWindowIndex.java index 436d4978947c26575dd57c61d6682cd3a556af1d..4559a99f46b0dfce071e193df2ea5fc5d8f61ae4 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWindowIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWindowIndex.java @@ -45,7 +45,6 @@ public class FilebaseWindowIndex { windowMap = new CountMap<>(); if (winFile.exists()) { final BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(winFile))); - in.readLine(); String line = null; while ((line = in.readLine()) != null) { final String[] parts = line.split(","); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 2448ee904794bfdf6d7f1308310ef6cef0d3806f..458af103a1f6110847cd9274cdc7deb34c646d3c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -13,8 +13,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.bson.types.ObjectId; import de.vipra.cmd.file.FilebaseIDDateIndex; @@ -23,7 +21,7 @@ import de.vipra.cmd.file.FilebaseWindowIndex; import de.vipra.cmd.file.FilebaseWordIndex; import de.vipra.util.ArrayUtils; import de.vipra.util.Config; -import de.vipra.util.ModelConfig; +import de.vipra.util.ConsoleUtils; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; @@ -36,6 +34,7 @@ import de.vipra.util.model.SimilarArticle; import de.vipra.util.model.Topic; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicModel; +import de.vipra.util.model.TopicModelConfig; import de.vipra.util.model.TopicModelFull; import de.vipra.util.model.TopicShare; import de.vipra.util.model.TopicWord; @@ -45,8 +44,6 @@ import de.vipra.util.service.Service.QueryBuilder; public class Analyzer { - public static final Logger log = LogManager.getLogger(Analyzer.class); - private final Config config; private final File dataDir; private final File dtmBinary; @@ -73,10 +70,10 @@ public class Analyzer { throw new AnalyzerException("dtm binary not found at path: " + config.getDtmPath() + ", check config key 'tm.dtmpath'"); } - public void analyze(final ModelConfig modelConfig, final boolean reread) + public void analyze(final TopicModelConfig modelConfig, final boolean reread) throws AnalyzerException, DatabaseException, ParseException, IOException, InterruptedException { - final File modelDir = new File(dataDir, modelConfig.getName()); + final File modelDir = modelConfig.getModelDir(dataDir); final File outDir = new File(modelDir, "out"); final File outDirSeq = new File(outDir, "lda-seq"); @@ -106,8 +103,6 @@ public class Analyzer { final String command = dtmBinary.getAbsolutePath() + " " + StringUtils.join(parameters, " "); - final FilebaseWindowIndex windowIndex = new FilebaseWindowIndex(modelDir, modelConfig.getWindowResolution()); - BufferedReader in; if (!reread) { @@ -122,7 +117,7 @@ public class Analyzer { int iteration = 0; while ((line = in.readLine()) != null) { if (line.contains("EM iter")) { - log.info("iteration " + iteration++); + ConsoleUtils.info("iteration " + iteration++); } } @@ -130,20 +125,18 @@ public class Analyzer { p.waitFor(); } - final FilebaseWordIndex wordIndex = new FilebaseWordIndex(modelConfig.getModelDir(dataDir)); - final FilebaseIDDateIndex idDateIndex = new FilebaseIDDateIndex(modelConfig.getModelDir(dataDir)); - - final QueryBuilder builder = QueryBuilder.builder().criteria("model.id", modelConfig.getName()); - dbArticles.deleteMultiple(builder); - dbTopics.deleteMultiple(builder); - dbSequences.deleteMultiple(builder); - dbWindows.deleteMultiple(builder); - dbTopicModels.deleteSingle(modelConfig.getName()); + final FilebaseWordIndex wordIndex = new FilebaseWordIndex(modelDir); + final FilebaseIDDateIndex idDateIndex = new FilebaseIDDateIndex(modelDir); + final FilebaseWindowIndex windowIndex = new FilebaseWindowIndex(modelDir, modelConfig.getWindowResolution()); final int topicCount = modelConfig.getkTopics(); - final int wordCount = wordIndex.size(); + assert topicCount > 0; + final int sequencesCount = windowIndex.size(); + assert sequencesCount > 0; + final int articlesCount = idDateIndex.size(); + final int wordCount = wordIndex.size(); // read topic distributions @@ -171,14 +164,14 @@ public class Analyzer { // read topic definition files and create topics - final TopicModelFull newTopicModel = new TopicModelFull(modelConfig.getName()); + final TopicModelFull topicModel = new TopicModelFull(modelConfig.getName(), modelConfig); final List<Window> newWindows = new ArrayList<>(sequencesCount); final List<SequenceFull> newSequences = new ArrayList<>(topicCount * sequencesCount); final List<TopicFull> newTopics = new ArrayList<>(topicCount); - log.info("vocabulary size: " + wordCount); - log.info("sequences: " + sequencesCount); - log.info("topics: " + topicCount); + ConsoleUtils.info("vocabulary size: " + wordCount); + ConsoleUtils.info("sequences: " + sequencesCount); + ConsoleUtils.info("topics: " + topicCount); final boolean seqRelativeCutoff = modelConfig.getMinRelativeProbability() > 0; @@ -189,7 +182,7 @@ public class Analyzer { newWindow.setStartDate(windowIndex.startDate(idxSeq)); newWindow.setEndDate(windowIndex.endDate(idxSeq)); newWindow.setWindowResolution(modelConfig.getWindowResolution()); - newWindow.setModel(new TopicModel(newTopicModel.getId())); + newWindow.setTopicModel(new TopicModel(topicModel.getId())); newWindows.add(newWindow); } @@ -205,7 +198,7 @@ public class Analyzer { final TopicFull newTopic = new TopicFull(); final List<Sequence> newTopicSequences = new ArrayList<>(sequencesCount); newTopic.setSequences(newTopicSequences); - newTopic.setModel(new TopicModel(newTopicModel.getId())); + newTopic.setTopicModel(new TopicModel(topicModel.getId())); newTopics.add(newTopic); in = new BufferedReader(new InputStreamReader(new FileInputStream(seqFile))); @@ -271,7 +264,7 @@ public class Analyzer { newSequenceFull.setRelevance(relevance); newSequenceFull.setRelevanceChange(relevance - prevRelevance); newSequenceFull.setTopic(new Topic(newTopic.getId())); - newSequenceFull.setModel(new TopicModel(newTopicModel.getId())); + newSequenceFull.setTopicModel(new TopicModel(topicModel.getId())); newSequences.add(newSequenceFull); newTopicSequences.add(new Sequence(newSequenceFull.getId())); @@ -294,7 +287,7 @@ public class Analyzer { // sort topic words and generate topic name final List<TopicWord> topTopicWordsList = new ArrayList<>(topTopicWords); Collections.sort(topTopicWordsList); - newTopic.setName(TopicFull.getNameFromWords(topTopicWordsList)); + newTopic.setName(TopicFull.getNameFromWords(modelConfig.getTopicAutoNamingWords(), topTopicWordsList)); // calculate average final double average = relevanceSum / sequencesCount; @@ -325,14 +318,17 @@ public class Analyzer { newTopic.setRisingDecayRelevance(risingDecayRelevance); } - // create topics and words - try { - dbWindows.createMultiple(newWindows); - dbSequences.createMultiple(newSequences); - dbTopics.createMultiple(newTopics); - } catch (final DatabaseException e) { - throw new AnalyzerException(e); - } + // recreate windows, sequences and topics + + final QueryBuilder builder = QueryBuilder.builder().criteria("topicModel", new TopicModel(modelConfig.getName())); + + dbWindows.deleteMultiple(builder); + dbSequences.deleteMultiple(builder); + dbTopics.deleteMultiple(builder); + + dbWindows.createMultiple(newWindows); + dbSequences.createMultiple(newSequences); + dbTopics.createMultiple(newTopics); // create topic references and store document similarities @@ -392,19 +388,19 @@ public class Analyzer { // update article with topic references (partial update) final ArticleFull article = new ArticleFull(); article.setId(entry.getId()); - article.setModel(new TopicModel(newTopicModel.getId())); + article.setTopicModel(new TopicModel(topicModel.getId())); article.setTopics(newTopicRefs); article.setSimilarArticles(similarArticles); try { - dbArticles.updateSingle(article, "model", "topics", "similarArticles"); + dbArticles.updateSingle(article, "topicModel", "topics", "similarArticles"); } catch (final DatabaseException e) { - log.error(e); + ConsoleUtils.error(e); } } } - dbTopicModels.createSingle(newTopicModel); + dbTopicModels.replaceSingle(topicModel); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java index b731886a5e3cbb979612b74a66bb771f2219fe21..76d2c1bdd172d212a0dcaf3eab07a8ee1a9180c6 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -4,60 +4,34 @@ import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.bson.types.ObjectId; import org.elasticsearch.client.Client; import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; import de.vipra.util.ESClient; -import de.vipra.util.model.Article; -import de.vipra.util.model.Sequence; -import de.vipra.util.model.Topic; -import de.vipra.util.model.TopicModel; -import de.vipra.util.model.Window; import de.vipra.util.service.MongoService; public class ClearCommand implements Command { - public static final Logger log = LogManager.getLogger(ClearCommand.class); - private Config config; - private MongoService<Article, ObjectId> dbArticles; - private MongoService<Topic, ObjectId> dbTopics; - private MongoService<Sequence, ObjectId> dbSequences; - private MongoService<Window, Integer> dbWindows; - private MongoService<TopicModel, String> dbTopicModels; private Client elasticClient; private void clear() throws Exception { config = Config.getConfig(); - dbArticles = MongoService.getDatabaseService(config, Article.class); - dbTopics = MongoService.getDatabaseService(config, Topic.class); - dbSequences = MongoService.getDatabaseService(config, Sequence.class); - dbWindows = MongoService.getDatabaseService(config, Window.class); - dbTopicModels = MongoService.getDatabaseService(config, TopicModel.class); elasticClient = ESClient.getClient(config); - - log.info("clearing database"); - dbArticles.drop(); - dbTopics.drop(); - dbSequences.drop(); - dbWindows.drop(); - dbTopicModels.drop(); - - log.info("clearing index"); + MongoService.dropDatabase(config); elasticClient.admin().indices().prepareDelete("_all").get(); try { - log.info("clearing filebase"); final File dataDir = config.getDataDirectory(); if (dataDir.exists() && dataDir.isDirectory()) { FileUtils.deleteDirectory(dataDir); } } catch (final IOException e) { - log.warn("could not delete data directory: " + config.getDataDirectory().getAbsolutePath()); + ConsoleUtils.warn("could not delete data directory: " + config.getDataDirectory().getAbsolutePath()); } + + ConsoleUtils.info("cleared"); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java index fc93cfc452a9f86cfed253d2abf58a000ee2fd41..8d11e4f07b1444d89fbc5566b53dcf4943ce65fe 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java @@ -2,19 +2,14 @@ package de.vipra.cmd.option; import java.io.File; -import org.apache.commons.io.IOUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import de.vipra.util.Config; -import de.vipra.util.Constants; -import de.vipra.util.FileUtils; -import de.vipra.util.ModelConfig; +import de.vipra.util.ConsoleUtils; +import de.vipra.util.model.TopicModelConfig; +import de.vipra.util.model.TopicModelFull; +import de.vipra.util.service.MongoService; public class CreateModelCommand implements Command { - public static final Logger log = LogManager.getLogger(CreateModelCommand.class); - private final String[] names; public CreateModelCommand(final String[] names) { @@ -27,12 +22,14 @@ public class CreateModelCommand implements Command { return; final Config config = Config.getConfig(); + final MongoService<TopicModelFull, String> dbTopicModels = MongoService.getDatabaseService(config, TopicModelFull.class); - final String modelConfigString; - if (config.getModelConfigTemplate() == null) { - modelConfigString = IOUtils.toString(FileUtils.getResource(Constants.MODEL_FILE)); + final TopicModelConfig modelConfig; + + if (config.getModelConfigTemplate() != null) { + modelConfig = new TopicModelConfig(config.getModelConfigTemplate()); } else { - modelConfigString = Config.mapper.writeValueAsString(config.getModelConfigTemplate()); + modelConfig = new TopicModelConfig(); } for (final String name : names) { @@ -43,12 +40,12 @@ public class CreateModelCommand implements Command { throw new Exception("model with that name already exists: " + name); if (!modelDir.mkdirs()) throw new Exception("could not create model directory: " + modelDir.getAbsolutePath()); - final File modelConfigFile = new File(modelDir, Constants.MODEL_FILE); - final ModelConfig modelConfig = Config.mapper.readValue(modelConfigString, ModelConfig.class); + modelConfig.setName(name); - org.apache.commons.io.FileUtils.write(modelConfigFile, Config.mapper.writeValueAsString(modelConfig)); - config.getModelConfigs().put(name, modelConfig); - log.info("model created: " + name); + final TopicModelFull topicModel = new TopicModelFull(name, modelConfig); + dbTopicModels.createSingle(topicModel); + config.getTopicModelConfigs().put(name, modelConfig); + ConsoleUtils.info("model created: " + name); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteModelCommand.java index 7d10cf6abafb1c8719ac287962faf12177cdbeec..8c771a0a0835c1e2fd10e10c631ea259aedf5f19 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteModelCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/DeleteModelCommand.java @@ -1,16 +1,15 @@ package de.vipra.cmd.option; import java.io.File; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import java.util.Arrays; import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; +import de.vipra.util.model.TopicModel; +import de.vipra.util.service.MongoService; public class DeleteModelCommand implements Command { - public static final Logger log = LogManager.getLogger(DeleteModelCommand.class); - private final String[] names; public DeleteModelCommand(final String[] names) { @@ -20,13 +19,16 @@ public class DeleteModelCommand implements Command { @Override public void run() throws Exception { final Config config = Config.getConfig(); + final MongoService<TopicModel, String> dbTopicModels = MongoService.getDatabaseService(config, TopicModel.class); + for (final String name : names) { final File modelDir = new File(config.getDataDirectory(), name); if (modelDir.exists()) { org.apache.commons.io.FileUtils.deleteDirectory(modelDir); - log.info("model deleted: " + name); + ConsoleUtils.info("model deleted: " + name); } } + dbTopicModels.deleteMultiple(Arrays.asList(names)); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java deleted file mode 100644 index 510757b16821cca4b61758db6f0e5676eba3bf68..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package de.vipra.cmd.option; - -import java.awt.Desktop; -import java.io.File; - -import de.vipra.util.Config; -import de.vipra.util.ModelConfig; -import de.vipra.util.ex.ConfigException; - -public class EditModelCommand implements Command { - - private final File configFile; - - public EditModelCommand(final String model) throws ConfigException { - final Config config = Config.getConfig(); - final ModelConfig modelConfig = config.getModelConfigs().get(model); - configFile = modelConfig.getConfigFile(config.getDataDirectory()); - } - - @Override - public void run() throws Exception { - if (!configFile.exists()) - throw new Exception("missing model configuration file: " + configFile.getAbsolutePath()); - - Desktop.getDesktop().open(configFile); - } - -} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 94211ee14dea95cac03dfa49e7deb11effe76328..17a18c6cccc48ef0dff6613698e000cc56022d8d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -6,13 +6,9 @@ import java.io.FileReader; import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; +import java.util.EnumSet; import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.bson.types.ObjectId; import org.json.simple.JSONArray; import org.json.simple.JSONObject; @@ -24,15 +20,23 @@ import de.vipra.cmd.file.FilebaseException; import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.cmd.text.ProcessorException; +import de.vipra.cmd.text.SpotlightAnalyzer; +import de.vipra.cmd.text.SpotlightResource; +import de.vipra.cmd.text.SpotlightResponse; import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; import de.vipra.util.Constants; -import de.vipra.util.ModelConfig; +import de.vipra.util.Constants.ProcessorMode; import de.vipra.util.StringUtils; import de.vipra.util.Timer; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; import de.vipra.util.model.ArticleStats; +import de.vipra.util.model.TextEntity; +import de.vipra.util.model.TopicModel; +import de.vipra.util.model.TopicModelConfig; +import de.vipra.util.model.TopicModelFull; import de.vipra.util.service.MongoService; public class ImportCommand implements Command { @@ -61,82 +65,18 @@ public class ImportCommand implements Command { } } - public static class ImportTask implements Runnable { - - private final JSONObject object; - private final Processor processor; - private final ArticleBuffer buffer; - private final Filebase filebase; - - public ImportTask(final JSONObject object, final Processor processor, final ArticleBuffer buffer, final Filebase filebase) { - this.object = object; - this.processor = processor; - this.buffer = buffer; - this.filebase = filebase; - } - - @Override - public void run() { - final ArticleFull article = articleFromJSON(object); - - try { - // preprocess text - final ProcessedText processedText = processor.process(article.getText()); - article.setProcessedText(processedText.getWords()); - article.setWords(processedText.getArticleWords()); - - // generate article stats - final ArticleStats stats = new ArticleStats(); - stats.setWordCount(processedText.getWordCount()); - stats.setProcessedWordCount(processedText.getReducedWordCount()); - stats.setReductionRatio(processedText.getReductionRatio()); - article.setStats(stats); - - // add article to mongodb - buffer.add(article); - - // add article to filebase if long enough - if (processedText.getReducedWordCount() >= Constants.DOCUMENT_MIN_LENGTH) - filebase.add(article); - - log.info("imported \"" + object.get("title")); - } catch (final ProcessorException e) { - log.error("could not preprocess text of article '" + article.getTitle() + "'"); - } catch (final DatabaseException e) { - log.error("could not save processed article in the database '" + article.getTitle() + "'"); - } catch (final FilebaseException e) { - log.error("could not save processed article in the filebase '" + article.getTitle() + "'"); - } - } - - private ArticleFull articleFromJSON(final JSONObject obj) { - final ArticleFull article = new ArticleFull(); - article.setId(new ObjectId()); - if (obj.containsKey("title")) - article.setTitle(obj.get("title").toString()); - if (obj.containsKey("text")) - article.setText(obj.get("text").toString()); - if (obj.containsKey("url")) - article.setUrl(obj.get("url").toString()); - if (obj.containsKey("date")) - article.setDate(obj.get("date").toString()); - return article; - } - - } - - public static final Logger log = LogManager.getLogger(ImportCommand.class); - private final String[] models; - private final int threadCount; private final List<File> files = new ArrayList<>(); private final JSONParser parser = new JSONParser(); private Config config; private MongoService<ArticleFull, ObjectId> dbArticles; + private MongoService<TopicModelFull, String> dbTopicModels; + private TopicModelConfig modelConfig; + private SpotlightAnalyzer spotlightAnalyzer; private Filebase filebase; private Processor processor; private ArticleBuffer buffer; - private ExecutorService executor; + private TopicModelFull topicModel; /** * Import command to import articles into the database, do topic modeling @@ -149,7 +89,6 @@ public class ImportCommand implements Command { */ public ImportCommand(final String[] models, final String[] paths) { this.models = models; - threadCount = Runtime.getRuntime().availableProcessors() * 10; addPaths(paths); } @@ -159,13 +98,13 @@ public class ImportCommand implements Command { } } - public void addPaths(final File[] paths) { + private void addPaths(final File[] paths) { for (final File path : paths) { addPath(path); } } - public void addPath(final File file) { + private void addPath(final File file) { if (file.isFile()) { files.add(file); } else if (file.isDirectory()) { @@ -180,6 +119,20 @@ public class ImportCommand implements Command { } } + private ArticleFull articleFromJSON(final JSONObject obj) { + final ArticleFull article = new ArticleFull(); + article.setId(new ObjectId()); + if (obj.containsKey("title")) + article.setTitle(obj.get("title").toString()); + if (obj.containsKey("text")) + article.setText(obj.get("text").toString()); + if (obj.containsKey("url")) + article.setUrl(obj.get("url").toString()); + if (obj.containsKey("date")) + article.setDate(obj.get("date").toString()); + return article; + } + /** * import a single article into the database and filebase * @@ -188,8 +141,61 @@ public class ImportCommand implements Command { * @throws Exception */ private void importArticle(final JSONObject object) { - final ImportTask task = new ImportTask(object, processor, buffer, filebase); - executor.execute(task); + final ArticleFull article = articleFromJSON(object); + + if (EnumSet.of(ProcessorMode.ENTITIES, ProcessorMode.TEXT_WITH_ENTITIES).contains(modelConfig.getProcessorMode())) { + try { + final SpotlightResponse spotlightResponse = spotlightAnalyzer.analyze(article.getText()); + + final List<TextEntity> textEntities = new ArrayList<>(spotlightResponse.getResources().size()); + final StringBuilder sb = new StringBuilder(); + + for (final SpotlightResource sr : spotlightResponse.getResources()) { + textEntities.add(new TextEntity(sr.getSurfaceForm(), sr.getUri())); + + for (final String type : sr.getTypes()) { + final String[] parts = type.split(":"); + sb.append(" ").append(parts[parts.length - 1]); + } + } + + // TODO do sth with this + } catch (final IOException e) { + ConsoleUtils.error("could not analyze text with spotlight: " + e.getMessage()); + } + } + + try { + // preprocess text + final ProcessedText processedText = processor.process(modelConfig, article.getText()); + + if (processedText.getReducedWordCount() < modelConfig.getDocumentMinimumLength()) { + ConsoleUtils.info(" skipped \"" + object.get("title")); + } else { + article.setProcessedText(processedText.getWords()); + article.setWords(processedText.getArticleWords()); + article.setTopicModel(new TopicModel(topicModel.getId())); + + // generate article stats + final ArticleStats stats = new ArticleStats(); + stats.setWordCount(processedText.getWordCount()); + stats.setProcessedWordCount(processedText.getReducedWordCount()); + stats.setReductionRatio(processedText.getReductionRatio()); + article.setStats(stats); + + // add article to data- and filebase + buffer.add(article); + filebase.add(article); + + ConsoleUtils.info("imported \"" + object.get("title")); + } + } catch (final ProcessorException e) { + ConsoleUtils.error("could not preprocess text of article '" + article.getTitle() + "'"); + } catch (final DatabaseException e) { + ConsoleUtils.error("could not save processed article in the database '" + article.getTitle() + "'"); + } catch (final FilebaseException e) { + ConsoleUtils.error("could not save processed article in the filebase '" + article.getTitle() + "'"); + } } /** @@ -214,7 +220,7 @@ public class ImportCommand implements Command { importArticle((JSONObject) data); imported++; } else { - log.error("unknown data format"); + ConsoleUtils.error("unknown data format"); } return imported; @@ -228,13 +234,17 @@ public class ImportCommand implements Command { return imported; } - private void importForModel(final ModelConfig modelConfig) + private void importForModel(final TopicModelConfig modelConfig) throws java.text.ParseException, IOException, ConfigException, ParseException, InterruptedException, DatabaseException { + this.modelConfig = modelConfig; + if (this.modelConfig.getProcessorMode() == ProcessorMode.ENTITIES || this.modelConfig.getProcessorMode() == ProcessorMode.TEXT_WITH_ENTITIES) + spotlightAnalyzer = new SpotlightAnalyzer(modelConfig); + buffer = new ArticleBuffer(dbArticles); filebase = new Filebase(modelConfig, config.getDataDirectory()); + topicModel = new TopicModelFull(modelConfig.getName(), modelConfig); - log.info("using data directory: " + config.getDataDirectory().getAbsolutePath()); - log.info("using " + threadCount + " " + StringUtils.quantity(threadCount, "thread")); + dbTopicModels.replaceSingle(topicModel); final Timer timer = new Timer(); timer.restart(); @@ -242,36 +252,28 @@ public class ImportCommand implements Command { /* * import files into database and filebase */ - log.info("file import"); - final int imported = importFiles(files); - executor.shutdown(); - executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); + importFiles(files); buffer.save(); - timer.lap("import"); /* * write filebase */ - log.info("writing file index"); filebase.sync(); - timer.lap("filebase write"); /* * run information */ - log.info("imported " + imported + " new " + StringUtils.quantity(imported, "article")); - log.info(timer.toString()); - log.info("done in " + StringUtils.timeString(timer.total())); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } @Override public void run() throws java.text.ParseException, IOException, ParseException, InterruptedException, DatabaseException, Exception { config = Config.getConfig(); dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); + dbTopicModels = MongoService.getDatabaseService(config, TopicModelFull.class); processor = new Processor(); - executor = Executors.newFixedThreadPool(threadCount); - for (final String model : models) { - importForModel(config.getModelConfig(model)); + for (final TopicModelConfig modelConfig : config.getTopicModelConfigs(models)) { + importForModel(modelConfig); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java index e72bed6539bffe99bef3ce77191a2395b5cfc542..3f58c23f1e988cfb608e6d4f84fb9ce5445f0af1 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java @@ -4,27 +4,25 @@ import java.io.IOException; import java.text.ParseException; import java.util.Map; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.bson.types.ObjectId; import org.elasticsearch.client.Client; +import org.elasticsearch.index.IndexNotFoundException; import de.vipra.cmd.file.FilebaseIDDateIndex; import de.vipra.cmd.file.FilebaseIDDateIndexEntry; import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; import de.vipra.util.ESClient; import de.vipra.util.ESSerializer; -import de.vipra.util.ModelConfig; import de.vipra.util.MongoUtils; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; +import de.vipra.util.model.TopicModelConfig; import de.vipra.util.service.MongoService; public class IndexingCommand implements Command { - public static final Logger log = LogManager.getLogger(IndexingCommand.class); - private final String[] models; private Config config; private MongoService<ArticleFull, ObjectId> dbArticles; @@ -35,26 +33,28 @@ public class IndexingCommand implements Command { this.models = models; } - private void indexForModel(final ModelConfig modelConfig) throws ParseException, IOException, ConfigException, DatabaseException { + private void indexForModel(final TopicModelConfig modelConfig) throws ParseException, IOException, ConfigException, DatabaseException { final FilebaseIDDateIndex index = new FilebaseIDDateIndex(modelConfig.getModelDir(config.getDataDirectory())); final String indexName = modelConfig.getName() + "-articles"; - // clear index - // elasticClient.admin().indices().prepareDelete("_all").get(); - elasticClient.admin().indices().prepareDelete(indexName).get(); + try { + // clear index + elasticClient.admin().indices().prepareDelete(indexName).get(); + } catch (final IndexNotFoundException e) {} for (final FilebaseIDDateIndexEntry entry : index) { // get article from database final ArticleFull article = dbArticles.getSingle(MongoUtils.objectId(entry.getId()), true); if (article == null) { - log.error("no article found in db for id " + entry.getId()); + ConsoleUtils.error("no article found in db for id: " + entry.getId()); continue; } // index article final Map<String, Object> source = elasticSerializer.serialize(article); elasticClient.prepareIndex(indexName, "article", article.getId().toString()).setSource(source).get(); + ConsoleUtils.info("indexed \"" + article.getTitle() + "\""); } elasticClient.close(); @@ -66,8 +66,8 @@ public class IndexingCommand implements Command { dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); elasticClient = ESClient.getClient(config); elasticSerializer = new ESSerializer<>(ArticleFull.class); - for (final String model : models) { - indexForModel(config.getModelConfig(model)); + for (final TopicModelConfig modelConfig : config.getTopicModelConfigs(models)) { + indexForModel(modelConfig); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java index d203417ef49cd27a044e86bbcc3fcbd349221153..54e4df82ee3865afcd9b8bd12376e248c19c4fb4 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java @@ -2,22 +2,26 @@ package de.vipra.cmd.option; import java.util.Map.Entry; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import org.fusesource.jansi.Ansi; import de.vipra.util.Config; -import de.vipra.util.ModelConfig; +import de.vipra.util.ConsoleUtils; +import de.vipra.util.model.TopicModelConfig; +import edu.stanford.nlp.util.StringUtils; public class ListModelsCommand implements Command { - public static final Logger log = LogManager.getLogger(ListModelsCommand.class); - @Override public void run() throws Exception { - log.info("existing models:"); + ConsoleUtils.info("existing models:"); final Config config = Config.getConfig(); - for (final Entry<String, ModelConfig> entry : config.getModelConfigs().entrySet()) - log.info(" " + entry.getValue().getName()); + int longestModelName = 0; + for (final Entry<String, TopicModelConfig> entry : config.getTopicModelConfigs().entrySet()) + longestModelName = Math.max(longestModelName, entry.getValue().getName().length()); + for (final Entry<String, TopicModelConfig> entry : config.getTopicModelConfigs().entrySet()) + ConsoleUtils + .info(" " + Ansi.ansi().a(Ansi.Attribute.INTENSITY_BOLD).a(StringUtils.pad(entry.getValue().getName(), longestModelName)).reset() + + " " + entry.getValue().toString()); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index 490ef4cb2d3cacbf1970d0fcf0a3e0b191213e7a..3a61ed455178cf58b4bfb2cceeb14fbeb29c6278 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -3,22 +3,18 @@ package de.vipra.cmd.option; import java.io.IOException; import java.text.ParseException; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import de.vipra.cmd.lda.Analyzer; import de.vipra.cmd.lda.AnalyzerException; import de.vipra.util.Config; -import de.vipra.util.ModelConfig; +import de.vipra.util.ConsoleUtils; import de.vipra.util.StringUtils; import de.vipra.util.Timer; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; +import de.vipra.util.model.TopicModelConfig; public class ModelingCommand implements Command { - public static final Logger log = LogManager.getLogger(ModelingCommand.class); - private final String[] models; private final boolean reread; @@ -27,7 +23,7 @@ public class ModelingCommand implements Command { this.reread = reread; } - private void modelForModel(final ModelConfig modelConfig) + private void modelForModel(final TopicModelConfig modelConfig) throws AnalyzerException, ConfigException, DatabaseException, ParseException, IOException, InterruptedException { final Analyzer analyzer = new Analyzer(); @@ -37,22 +33,21 @@ public class ModelingCommand implements Command { /* * do topic modeling */ - log.info("topic modeling"); + ConsoleUtils.info("topic modeling"); analyzer.analyze(modelConfig, reread); timer.lap("topic modeling"); /* * run information */ - log.info(timer.toString()); - log.info("done in " + StringUtils.timeString(timer.total())); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } @Override public void run() throws Exception { final Config config = Config.getConfig(); - for (final String model : models) { - modelForModel(config.getModelConfig(model)); + for (final TopicModelConfig modelConfig : config.getTopicModelConfigs(models)) { + modelForModel(modelConfig); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java index b9b1418c02a854aff45fca4f9119debd8cf4d651..38a8b374d0c24fc291cfd8e52eaa4ac24ff0e020 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java @@ -1,39 +1,36 @@ package de.vipra.cmd.option; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.bson.types.ObjectId; import org.elasticsearch.client.transport.NoNodeAvailableException; import org.elasticsearch.client.transport.TransportClient; import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; import de.vipra.util.ESClient; import de.vipra.util.model.Article; import de.vipra.util.service.MongoService; public class TestCommand implements Command { - public static final Logger log = LogManager.getLogger(TestCommand.class); - @Override public void run() throws Exception { // test if configuration readable - log.info("reading configuration..."); + ConsoleUtils.info("reading configuration..."); final Config config = Config.getConfig(); // test if database is accessible - log.info("testing mongodb connection..."); + ConsoleUtils.info("testing mongodb connection..."); final MongoService<Article, ObjectId> dbArticles = MongoService.getDatabaseService(config, Article.class); dbArticles.count(null); // test if elasticsearch is accessible - log.info("testing elasticsearch connection..."); + ConsoleUtils.info("testing elasticsearch connection..."); final TransportClient esclient = ESClient.getClient(config); if (esclient.connectedNodes().isEmpty()) { throw new NoNodeAvailableException("no elasticsearch nodes available"); } - log.info("all tests passed"); + ConsoleUtils.info("all tests passed"); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java b/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java deleted file mode 100644 index 76c416a85502e26b38e74bda81f3f6d06c3e0ce0..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java +++ /dev/null @@ -1,77 +0,0 @@ -package de.vipra.cmd.plugin; - -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.logging.log4j.core.LogEvent; -import org.apache.logging.log4j.core.config.plugins.Plugin; -import org.apache.logging.log4j.core.config.plugins.PluginAttribute; -import org.apache.logging.log4j.core.config.plugins.PluginFactory; -import org.apache.logging.log4j.core.filter.AbstractFilter; - -/** - * http://rohithag.blogspot.de/2014/04/log4j2-separate-log-files-by.html - */ -@Plugin(name = "ClassNameRegexFilter", category = "Core", elementType = "filter", printObject = true) -public final class ClassNameRegexFilter extends AbstractFilter { - - private static final long serialVersionUID = -6931373371808638290L; - - private final Pattern pattern; - - private ClassNameRegexFilter(final Pattern pattern, final Result onMatch, final Result onMismatch) { - super(onMatch, onMismatch); - this.pattern = pattern; - } - - @Override - public Result filter(final LogEvent event) { - return filter(event.getLoggerName()); - } - - private Result filter(final String className) { - if (className == null) { - return onMismatch; - } - final Matcher m = pattern.matcher(className); - return m.matches() ? onMatch : onMismatch; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("pattern=").append(pattern.toString()); - return sb.toString(); - } - - /** - * Create a Filter that matches a regular expression. - * - * @param regex - * The regular expression to match. - * @param match - * The action to perform when a match occurs. - * @param mismatch - * The action to perform when a mismatch occurs. - * @return The Log4jRegexFilter. - */ - @PluginFactory - public static ClassNameRegexFilter createFilter(@PluginAttribute("regex") final String regex, @PluginAttribute("onMatch") final String match, - @PluginAttribute("onMismatch") final String mismatch) { - if (regex == null) { - LOGGER.error("A regular expression must be provided for RegexFilter"); - return null; - } - Pattern pattern; - try { - pattern = Pattern.compile(regex); - } catch (final Exception ex) { - LOGGER.error("RegexFilter caught exception compiling pattern: " + regex + " cause: " + ex.getMessage()); - return null; - } - final Result onMatch = Result.toResult(match); - final Result onMismatch = Result.toResult(mismatch); - - return new ClassNameRegexFilter(pattern, onMatch, onMismatch); - } -} \ No newline at end of file diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java index 01e1c45a11ba29f14db50e8373af1f6a7f034e3a..41723aa8e8ea1bd3b9731435a4460ee54cad0e1f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java @@ -3,10 +3,8 @@ package de.vipra.cmd.text; import java.util.List; import java.util.Properties; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import de.vipra.util.Constants; +import de.vipra.util.model.TopicModelConfig; import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation; import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; @@ -18,8 +16,6 @@ import edu.stanford.nlp.util.StringUtils; public class Processor { - public static final Logger log = LogManager.getLogger(Processor.class); - private final StanfordCoreNLP nlp; public Processor() { @@ -38,7 +34,7 @@ public class Processor { nlp = new StanfordCoreNLP(props); } - public ProcessedText process(final String input) throws ProcessorException { + public ProcessedText process(final TopicModelConfig modelConfig, final String input) throws ProcessorException { final Annotation doc = new Annotation(input.toLowerCase()); nlp.annotate(doc); final StringBuilder sb = new StringBuilder(); @@ -55,7 +51,7 @@ public class Processor { if (b == null || !b) { // filter out infrequent words final Long count = word.get(FrequencyAnnotator.class); - if (count != null && count >= Constants.DOCUMENT_MIN_WORD_FREQ) { + if (count != null && count >= modelConfig.getDocumentMinimumWordFrequency()) { final String lemma = word.get(LemmaAnnotation.class); // collect unique words sb.append(lemma).append(" "); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightAnalyzer.java new file mode 100644 index 0000000000000000000000000000000000000000..7c041f68372babcb6fdab13db0a2d337b93b3860 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightAnalyzer.java @@ -0,0 +1,53 @@ +package de.vipra.cmd.text; + +import java.io.BufferedReader; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; + +import de.vipra.util.Config; +import de.vipra.util.URLUtils; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.model.TopicModelConfig; + +public class SpotlightAnalyzer { + + private final URL spotlightUrl; + private final TopicModelConfig modelConfig; + + public SpotlightAnalyzer(final TopicModelConfig modelConfig) throws MalformedURLException, ConfigException { + final Config config = Config.getConfig(); + spotlightUrl = new URL(URLUtils.concat(config.getSpotlightUrl(), "/rest/annotate")); + this.modelConfig = modelConfig; + } + + public SpotlightResponse analyze(String text) throws IOException { + text = "confidence=" + modelConfig.getSpotlightConfidence() + "&support=" + modelConfig.getSpotlightSupport() + "&text=" + + URLEncoder.encode(text, "UTF-8"); + + final HttpURLConnection connection = (HttpURLConnection) spotlightUrl.openConnection(); + connection.setRequestMethod("POST"); + connection.setRequestProperty("Content-Length", Integer.toString(text.getBytes().length)); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setRequestProperty("Accept", "application/json"); + connection.setUseCaches(false); + connection.setDoOutput(true); + + final DataOutputStream out = new DataOutputStream(connection.getOutputStream()); + out.writeBytes(text); + out.close(); + + final BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + final StringBuilder result = new StringBuilder(); + String line = null; + while ((line = in.readLine()) != null) + result.append(line); + + return Config.mapper.readValue(result.toString(), SpotlightResponse.class); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResource.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResource.java new file mode 100644 index 0000000000000000000000000000000000000000..baf54d3d7a5ce332ae5b3b97c765405139df8c6c --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResource.java @@ -0,0 +1,95 @@ +package de.vipra.cmd.text; + +import java.util.Arrays; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class SpotlightResource { + + @JsonProperty("@URI") + private String uri; + + @JsonProperty("@support") + private int support; + + @JsonProperty("@types") + private List<String> types; + + @JsonProperty("@surfaceForm") + private String surfaceForm; + + @JsonProperty("@offset") + private int offset; + + @JsonProperty("@similarityScore") + private double similarityScore; + + @JsonProperty("@percentageOfSecondRank") + private double percentageOfSecondRank; + + public String getUri() { + return uri; + } + + public void setUri(final String uri) { + this.uri = uri; + } + + public int getSupport() { + return support; + } + + public void setSupport(final int support) { + this.support = support; + } + + public List<String> getTypes() { + return types; + } + + public void setTypes(final List<String> types) { + this.types = types; + } + + @JsonSetter("@types") + public void setTypes(final String types) { + this.types = Arrays.asList(types.split(",")); + } + + public String getSurfaceForm() { + return surfaceForm; + } + + public void setSurfaceForm(final String surfaceForm) { + this.surfaceForm = surfaceForm; + } + + public int getOffset() { + return offset; + } + + public void setOffset(final int offset) { + this.offset = offset; + } + + public double getSimilarityScore() { + return similarityScore; + } + + public void setSimilarityScore(final double similarityScore) { + this.similarityScore = similarityScore; + } + + public double getPercentageOfSecondRank() { + return percentageOfSecondRank; + } + + public void setPercentageOfSecondRank(final double percentageOfSecondRank) { + this.percentageOfSecondRank = percentageOfSecondRank; + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java new file mode 100644 index 0000000000000000000000000000000000000000..0cb0ce0c6c8321336c8c7c8ce24ec0ccc909d780 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java @@ -0,0 +1,22 @@ +package de.vipra.cmd.text; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class SpotlightResponse { + + @JsonProperty("Resources") + private List<SpotlightResource> resources; + + public List<SpotlightResource> getResources() { + return resources; + } + + public void setResources(final List<SpotlightResource> resources) { + this.resources = resources; + } + +} diff --git a/vipra-cmd/src/main/resources/config.json b/vipra-cmd/src/main/resources/config.json index 137c93ce7dd59b74c3370a1a5f7643f0fca25a5f..ba2f39ecd81d944babd2e72b54baa7d30f11ab65 100644 --- a/vipra-cmd/src/main/resources/config.json +++ b/vipra-cmd/src/main/resources/config.json @@ -13,6 +13,15 @@ "dynamicMinIterations": 100, "dynamicMaxIterations": 1000, "staticIterations": 100, + "topicAutoNamingWords": 4, + "maxSimilarDocuments": 10, + "documentMinimumLength": 10, + "documentMinimumWordFrequency": 5, + "spotlightSupport": 0, + "spotlightConfidence": 0.5, + "minRelativeProbability": 0.01, + "risingDecayLambda": 0.0, + "maxSimilarDocumentsDivergence": 0.25, "windowResolution": "YEAR", "processorMode": "TEXT" } diff --git a/vipra-cmd/src/main/resources/log4j2.xml b/vipra-cmd/src/main/resources/log4j2.xml deleted file mode 100644 index 88658479afa01dfacd915b5208a5f00d3831fe43..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/resources/log4j2.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<Configuration packages="de.vipra.cmd.plugin"> - <Appenders> - <Console name="Console" target="SYSTEM_OUT"> - <PatternLayout pattern="%highlight{%-5level - %msg%n}{FATAL=red,ERROR=red,WARN=red,INFO=normal,DEBUG=normal,TRACE=normal}" /> - <ClassNameRegexFilter regex="de.vipra.*" onMatch="ACCEPT" onMismatch="DENY"/> - </Console> - </Appenders> - <Loggers> - <Root level="INFO"> - <AppenderRef ref="Console" /> - </Root> - </Loggers> -</Configuration> \ No newline at end of file diff --git a/vipra-cmd/src/main/resources/log4j2dev.xml b/vipra-cmd/src/main/resources/log4j2dev.xml deleted file mode 100644 index 8c371647c245ecb4f098a716fa981149282111b1..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/resources/log4j2dev.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<Configuration> - <Appenders> - <Console name="Console" target="SYSTEM_OUT"> - <PatternLayout pattern="%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n" /> - </Console> - </Appenders> - <Loggers> - <Root level="ALL"> - <AppenderRef ref="Console" /> - </Root> - <Logger name="org.mongodb" level="ERROR" /> - <Logger name="org.elasticsearch.transport.netty" level="ERROR" /> - </Loggers> -</Configuration> \ No newline at end of file diff --git a/vipra-cmd/src/main/resources/model.json b/vipra-cmd/src/main/resources/model.json deleted file mode 100644 index 0eed5c3637e873744b64831a81f89ea2b6b1b532..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/resources/model.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "name": "", - "kTopics": 20, - "dynamicMinIterations": 100, - "dynamicMaxIterations": 1000, - "staticIterations": 100, - "topicAutoNamingWords": 4, - "maxSimilarDocuments": 20, - "minRelativeProbability": 0.01, - "risingDecayLambda": 0.0, - "maxSimilarDocumentsDivergence": 0.25, - "windowResolution": "YEAR", - "processorMode": "TEXT" -} \ No newline at end of file diff --git a/vipra-ui/app/html/about.html b/vipra-ui/app/html/about.html index 35f097b55e3e51013c58dc6dc41399f5ccb7e04a..a7343ea7476ff0db09d1c04401f2f969d820c168 100644 --- a/vipra-ui/app/html/about.html +++ b/vipra-ui/app/html/about.html @@ -140,15 +140,6 @@ The number of topics to be generated in the topic modeling process. </td> </tr> - <tr> - <th>K topic words</th> - <td ng-bind-template="{{::info.const.ktopicwords}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The maximum number of words that are associated to a single topic. - </td> - </tr> <tr> <th>Rising decay weight</th> <td ng-bind-template="{{::info.const.decaylambda}}"></td> @@ -168,15 +159,6 @@ <it>maximum_probability * minimum_relative_probability</it>. </td> </tr> - <tr> - <th>Minimum share</th> - <td ng-bind-template="{{::info.const.minshare}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The minimum share of a topic to be accepted for an article. Topic shares are renormalized after rejecting topics below this threshold. - </td> - </tr> <tr> <th>Maximum similar documents</th> <td ng-bind-template="{{::info.const.maxsimdocs}}"></td> @@ -213,21 +195,21 @@ </td> </tr> <tr> - <th>Word minimum frequency</th> - <td ng-bind-template="{{::info.const.docminfreq}}"></td> + <th>Document minimum word count</th> + <td ng-bind-template="{{::info.const.docminlength}}"></td> </tr> <tr class="well"> <td colspan="2"> - The minimum word frequency for unique words in an article to be used in the topic modeling process. Unique words with a lower frequency are ignored. + The minimum article word count. Articles with less words are not included in the topic modeling process. </td> </tr> <tr> - <th>Document minimum word count</th> - <td ng-bind-template="{{::info.const.docminlength}}"></td> + <th>Document minimum word frequency</th> + <td ng-bind-template="{{::info.const.docminwordfreq}}"></td> </tr> <tr class="well"> <td colspan="2"> - The minimum article word count. Articles with less words are not included in the topic modeling process. + The minimum article word frequency. Words that occurr less than this frequency are stripped from the article. </td> </tr> <tr> diff --git a/vipra-ui/app/html/articles/index.html b/vipra-ui/app/html/articles/index.html index 681ebf3ec9d434de77843ff3c68c1d4bf19d0e2e..e284dcbe4c74ec2bd7f1279289092c602777eea7 100644 --- a/vipra-ui/app/html/articles/index.html +++ b/vipra-ui/app/html/articles/index.html @@ -25,8 +25,6 @@ <tr ng-repeat="article in articles"> <td> <a ui-sref="articles.show({id: article.id})" ng-bind="::article.title"></a> - - <small class="text-muted" ng-bind-template="[{{::Vipra.formatDate(article.date)}}]"></small> </td> </tr> </tbody> diff --git a/vipra-ui/app/html/index.html b/vipra-ui/app/html/index.html index ec0dbe4df35470fbf89d6185ffe7cb0d414e73fe..d27f4c3d76b9edc4ffecaca272b574f4c56198af 100644 --- a/vipra-ui/app/html/index.html +++ b/vipra-ui/app/html/index.html @@ -54,4 +54,4 @@ </div> </div> </div> -<div ng-cloak ui-view></div> +<div ng-cloak ui-view></div> \ No newline at end of file diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html index 64f09a52969ed9c493b81ed6cd03ab4d42183787..49d4af3a0e4da3f0feb0647ebdc98e337dc89189 100644 --- a/vipra-ui/app/index.html +++ b/vipra-ui/app/index.html @@ -70,6 +70,11 @@ </li> </ul> <ul class="nav navbar-nav navbar-right"> + <li> + <a data-toggle="modal" data-target="#topicModelModal"> + Models + </a> + </li> <li ui-sref-active="active"> <a ui-sref="about"> About @@ -81,7 +86,48 @@ </div> <!-- /.container-fluid --> </nav> - <div class="main" ui-view ng-cloak></div> + <div class="main" ui-view ng-cloak ng-show="topicModel"></div> + + <div id="topicModelModal" class="modal fade" tabindex="-1" role="dialog" data-backdrop="static" data-keyboard="false" bs-modal> + <div class="modal-dialog modal-lg"> + <div class="modal-content"> + <div class="modal-header"> + <button type="button" class="close" data-dismiss="modal" aria-label="Close" ng-show="topicModel"><span aria-hidden="true">×</span></button> + <h4 class="modal-title">Topic Models</h4> + </div> + <div class="modal-body"> + <ul class="list-group" ng-show="topicModels.length"> + <button type="button" class="list-group-item" ng-repeat="model in topicModels" ng-click="changeTopicModel(model)" ng-class="{active:topicModel.id===model.id}"> + <span class="badge" ng-bind="model.modelConfig.kTopics"></span> + <span ng-bind="model.id"></span> + </button> + </ul> + <p class="text-center" ng-show="loading.any"> + Loading... + </p> + <p ng-hide="topicModels.length || loading.any"> + No topic models in the database. Create a topic model and import data into it to begin. + </p> + <h4>Quick start</h4> + <ol> + <li> + Create a model: + <pre>vipra -C some_model</pre> + </li> + <li> + Import data into it: + <pre>vipra -S some_model -I data.json</pre> + </li> + <li> + Generate topic data: + <pre>vipra -S some_model -Mi</pre> + </li> + </ol> + </div> + </div> + </div> + </div> + </body> </html> diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 58d8fd9ee96ee7dbaedc613a261f12c69a6acbe5..5fb343e0e046eb6dd4de525f10677eb780f0dea4 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -2,7 +2,7 @@ * Vipra Application * Controllers ******************************************************************************/ -/* globals angular, Vipra, moment, vis, console, prompt, randomColor, Highcharts */ +/* globals angular, Vipra, moment, vis, console, prompt, randomColor, Highcharts, $ */ (function() { "use strict"; @@ -12,9 +12,26 @@ 'vipra.factories' ]); - app.controller('RootController', ['$scope', '$state', function($scope, $state) { - $scope.$state = $state; - }]); + app.controller('RootController', ['$scope', '$state', 'TopicModelFactory', + function($scope, $state, TopicModelFactory) { + + $scope.$state = $state; + + TopicModelFactory.query({ + fields: 'modelConfig' + }, function(data) { + $scope.topicModels = data; + }, function(err) { + $scope.errors = err; + }); + + $scope.changeTopicModel = function(topicModel) { + $scope.topicModel = topicModel; + $('#topicModelModal').modal('hide'); + }; + + } + ]); /** * Index controller @@ -24,29 +41,37 @@ $scope.search = $location.search().query; - ArticleFactory.query({ - limit: 3, - sort: '-created' - }, function(data) { - $scope.latestArticles = data; - }, function(err) { - $scope.errors = err; - }); + $scope.$watch('topicModel', function(topicModel) { + if(!topicModel) return; + + ArticleFactory.query({ + topicModel: topicModel.id, + limit: 3, + sort: '-created' + }, function(data) { + $scope.latestArticles = data; + }, function(err) { + $scope.errors = err; + }); - TopicFactory.query({ - limit: 3, - sort: '-created' - }, function(data) { - $scope.latestTopics = data; - }, function(err) { - $scope.errors = err; + TopicFactory.query({ + topicModel: topicModel.id, + limit: 3, + sort: '-created' + }, function(data) { + $scope.latestTopics = data; + }, function(err) { + $scope.errors = err; + }); }); - $scope.$watch('search', function() { - if ($scope.search) { + $scope.$watchGroup(['search', 'topicModel'], function() { + if ($scope.search && $scope.topicModel) { $location.search('query', $scope.search); $scope.searching = true; + SearchFactory.query({ + topicModel: $scope.topicModel.id, limit: 10, query: $scope.search }, function(data) { @@ -501,8 +526,11 @@ $scope.page = Math.max($location.search().page || 1, 1); $scope.limit = 100; - $scope.$watchGroup(['page', 'opts.sortkey', 'opts.sortdir'], function() { + $scope.$watchGroup(['page', 'opts.sortkey', 'opts.sortdir', 'topicModel'], function() { + if(!$scope.topicModel) return; + TopicFactory.query({ + topicModel: $scope.topicModel.id, skip: ($scope.page - 1) * $scope.limit, limit: $scope.limit, sort: ($scope.opts.sortdir ? '' : '-') + $scope.opts.sortkey @@ -530,15 +558,20 @@ sortwords: '-likeliness' }; - TopicFactory.get({ - id: $stateParams.id - }, function(data) { - $scope.topic = data; - $scope.topicCreated = Vipra.formatDateTime($scope.topic.created); - $scope.topicModified = Vipra.formatDateTime($scope.topic.modified); - $scope.redrawGraph(); - }, function(err) { - $scope.errors = err; + $scope.$watch('topicModel', function() { + if(!$scope.topicModel) return; + + TopicFactory.get({ + id: $stateParams.id, + topicModel: $scope.topicModel.id + }, function(data) { + $scope.topic = data; + $scope.topicCreated = Vipra.formatDateTime($scope.topic.created); + $scope.topicModified = Vipra.formatDateTime($scope.topic.modified); + $scope.redrawGraph(); + }, function(err) { + $scope.errors = err; + }); }); $scope.redrawGraph = function() { @@ -568,7 +601,9 @@ $scope.endRename = function(save) { delete $scope.renameErrors; if (save) { - TopicFactory.update({ id: $scope.topic.id }, $scope.topic, function(data) { + TopicFactory.update({ + id: $scope.topic.id + }, $scope.topic, function(data) { $scope.topic = data; $scope.isRename = false; }, function(err) { @@ -590,17 +625,18 @@ $scope.$watch('opts.seqstyle', $scope.redrawGraph); $scope.$watch('opts.chartstyle', $scope.redrawGraph); - $scope.$watch('sequenceId', function(sequence) { - if (sequence) { - SequenceFactory.get({ - id: sequence, - topWords: 20 - }, function(data) { - $scope.sequence = data; - }, function(err) { - $scope.errors = err; - }); - } + $scope.$watchGroup(['sequenceId', 'topicModel'], function() { + if(!$scope.sequenceId || !$scope.topicModel) return; + + SequenceFactory.get({ + id: $scope.sequenceId, + topicModel: $scope.topicModel.id, + topWords: 20 + }, function(data) { + $scope.sequence = data; + }, function(err) { + $scope.errors = err; + }); }); } ]); @@ -619,9 +655,12 @@ $scope.page = Math.max($location.search().page || 1, 1); $scope.limit = 100; - $scope.$watchGroup(['page', 'opts.sortkey', 'opts.sortdir'], function() { + $scope.$watchGroup(['page', 'opts.sortkey', 'opts.sortdir', 'topicModel'], function() { + if(!$scope.topicModel) return; + TopicFactory.articles({ id: $stateParams.id, + topicModel: $scope.topicModel.id, skip: ($scope.page - 1) * $scope.limit, limit: $scope.limit, sort: ($scope.opts.sortdir ? '' : '-') + $scope.opts.sortkey diff --git a/vipra-ui/app/js/directives.js b/vipra-ui/app/js/directives.js index 404d439da573128488eaddb27a4a026b2b613db7..82900e4e00ac815addbd75d718f2ed7f70541cb8 100644 --- a/vipra-ui/app/js/directives.js +++ b/vipra-ui/app/js/directives.js @@ -146,6 +146,14 @@ } ]); + app.directive('bsModal', [function() { + return { + link: function($scope, $elem) { + $elem.modal(); + } + }; + }]); + app.directive('sequenceDropdown', [function() { return { scope: { diff --git a/vipra-ui/app/js/factories.js b/vipra-ui/app/js/factories.js index b65bb073cffe1274df9eeab01974b3cb3eac119c..f5ff9043fdcb454be0d5120b7613136bc98e1b6d 100644 --- a/vipra-ui/app/js/factories.js +++ b/vipra-ui/app/js/factories.js @@ -34,6 +34,10 @@ return $resource(Vipra.config.restUrl + '/info'); }]); + app.factory('TopicModelFactory', ['$resource', function($resource) { + return $resource(Vipra.config.restUrl + '/topicmodels'); + }]); + // https://gist.github.com/Fluidbyte/4718380 app.factory('Store', ['$state', function($state) { return function(key, value) { diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less index 8f1d3a1c11d2339f5dc6ab68201a1f07389e3498..d3e6ef59a2c32c076afa40709a24ab6029583c29 100644 --- a/vipra-ui/app/less/app.less +++ b/vipra-ui/app/less/app.less @@ -371,6 +371,11 @@ topic-menu { display: inline-block; } +[bs-list] > li { + .pointer; + +} + @-moz-keyframes spin { 100% { -moz-transform: rotateY(360deg); diff --git a/vipra-util/pom.xml b/vipra-util/pom.xml index 1519155c68e20a808dc96b534fccb4d80535bde8..4fd1e61073c4524385a864d85907668045e26228 100644 --- a/vipra-util/pom.xml +++ b/vipra-util/pom.xml @@ -28,6 +28,13 @@ <version>2.4</version> </dependency> + <!-- Jansi --> + <dependency> + <groupId>org.fusesource.jansi</groupId> + <artifactId>jansi</artifactId> + <version>1.11</version> + </dependency> + <!-- SLF4j logging --> <dependency> <groupId>org.slf4j</groupId> diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index 8026369566975b6f9934c724e8cb2ef57a0b3f29..2aace5d8fcff6594082634eb9eeb8bc606f59ef1 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -3,7 +3,10 @@ package de.vipra.util; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; @@ -17,6 +20,8 @@ import com.fasterxml.jackson.databind.SerializationFeature; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.Model; +import de.vipra.util.model.TopicModelConfig; +import de.vipra.util.model.TopicModelFull; import de.vipra.util.service.MongoService; public class Config { @@ -37,12 +42,12 @@ public class Config { private String databaseName = Constants.DATABASE_NAME; private String elasticSearchHost = Constants.ES_HOST; private int elasticSearchPort = Constants.ES_PORT; - private ModelConfig modelConfigTemplate = new ModelConfig(); + private TopicModelConfig modelConfigTemplate = new TopicModelConfig(); private String spotlightUrl; private String dtmPath; @JsonIgnore - private Map<String, ModelConfig> modelConfigs; + private Map<String, TopicModelConfig> topicModelConfigs; public String getDatabaseHost() { return databaseHost; @@ -100,30 +105,40 @@ public class Config { this.dtmPath = dtmPath; } - public Map<String, ModelConfig> getModelConfigs() { - return modelConfigs; + public Map<String, TopicModelConfig> getTopicModelConfigs() { + return topicModelConfigs; } - public ModelConfig getModelConfig(final String name) throws Exception { - final ModelConfig modelConfig = modelConfigs.get(name); + public Collection<TopicModelConfig> getTopicModelConfigs(final String[] names) throws Exception { + final List<TopicModelConfig> topicModelConfigs = new ArrayList<>(names.length); + for (final String name : names) { + if (name.equalsIgnoreCase("all")) + return this.topicModelConfigs.values(); + topicModelConfigs.add(getTopicModelConfig(name)); + } + return topicModelConfigs; + } + + public TopicModelConfig getTopicModelConfig(final String name) throws Exception { + final TopicModelConfig modelConfig = topicModelConfigs.get(name); if (modelConfig == null) throw new Exception("unknown model: " + name); return modelConfig; } - public void setModelConfigs(final Map<String, ModelConfig> modelConfigs) { - this.modelConfigs = modelConfigs; + public void setTopicModelConfigs(final Map<String, TopicModelConfig> topicModelConfigs) { + this.topicModelConfigs = topicModelConfigs; } public void setDataDirectory(final String dataDirectory) { this.dataDirectory = dataDirectory; } - public ModelConfig getModelConfigTemplate() { + public TopicModelConfig getModelConfigTemplate() { return modelConfigTemplate; } - public void setModelConfigTemplate(final ModelConfig modelConfigTemplate) { + public void setModelConfigTemplate(final TopicModelConfig modelConfigTemplate) { this.modelConfigTemplate = modelConfigTemplate; } @@ -235,22 +250,13 @@ public class Config { if (instance == null) throw new ConfigException("could not read configuration"); - // read model configurations - final File dataDir = instance.getDataDirectory(); - final Map<String, ModelConfig> modelConfigs = new HashMap<>(); - for (final File file : dataDir.listFiles()) { - if (file.isDirectory()) { - final File modelConfigFile = new File(file, Constants.MODEL_FILE); - if (!modelConfigFile.exists()) - throw new ConfigException("missing model configuration file: " + modelConfigFile.getAbsolutePath()); - final ModelConfig configDtm = mapper.readValue(modelConfigFile, ModelConfig.class); - if (configDtm.getName() == null || configDtm.getName().isEmpty()) - throw new ConfigException("models must have a name: " + modelConfigFile.getAbsolutePath()); - modelConfigs.put(configDtm.getName(), configDtm); - } - } - instance.modelConfigs = modelConfigs; - + // read topic model configs + final MongoService<TopicModelFull, String> dbTopicModels = MongoService.getDatabaseService(instance, TopicModelFull.class); + final List<TopicModelFull> topicModels = dbTopicModels.getAll(); + final Map<String, TopicModelConfig> topicModelConfigs = new HashMap<>(topicModels.size()); + for (final TopicModelFull topicModel : topicModels) + topicModelConfigs.put(topicModel.getId(), topicModel.getModelConfig()); + instance.setTopicModelConfigs(topicModelConfigs); } catch (final IOException e) { throw new ConfigException(e); } diff --git a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..7e2ee2f0f93636a6d1091322c457b08070f21397 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java @@ -0,0 +1,38 @@ +package de.vipra.util; + +import org.fusesource.jansi.Ansi; +import org.fusesource.jansi.Ansi.Color; + +public class ConsoleUtils { + + private static boolean silent = false; + private static int pad = 5; + + public static void setSilent(final boolean s) { + silent = s; + } + + public static void info(final String msg) { + if (!silent) + System.out.println(label("INFO") + " - " + msg); + } + + public static void warn(final String msg) { + if (!silent) + System.out.println(label("WARN") + " - " + msg); + } + + public static void error(final String msg) { + if (!silent) + System.err.println(label("ERROR") + " - " + Ansi.ansi().fg(Color.RED).a(msg).reset()); + } + + public static void error(final Throwable t) { + error(t.getMessage()); + } + + private static String label(final String label) { + return StringUtils.pad(label, pad); + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index c9b1d39433de5e5108b7ea4bed1e1f6d8bd34ffb..ea6d2591b5f2361fc8d6e9dc5898933c1d8317b0 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -30,7 +30,6 @@ public class Constants { */ public static final String CONFIG_FILE = "config.json"; - public static final String MODEL_FILE = "model.json"; /* * DATABASE @@ -73,16 +72,10 @@ public class Constants { */ public static final int K_TOPICS = 20; - /** - * Number of words in a discovered topic, if the selected topic modeling - * library supports this parameter. Default 50. - */ - public static final int K_TOPIC_WORDS = 50; - /** * This value is a weight to the rising decay caulculation of topic * relevances. The higher this value, the more focus is put on later - * sequences containing more recent documents. Default 0. + * sequences containing more recent documents. Default 0.0. */ public static final double RISING_DECAY_LAMBDA = 0.0; @@ -93,19 +86,13 @@ public class Constants { public static final double MIN_RELATIVE_PROB = 0.01; /** - * The minimum share of a topic to be accepted for an article. Topic shares - * are renormalized after rejecting topics below this threshold. - */ - public static final double MINIMUM_SHARE = 0.01; - - /** - * Maximum number of similar documents for each document. + * Maximum number of similar documents for each document. Default 10. */ - public static final int MAX_SIMILAR_DOCUMENTS = 20; + public static final int MAX_SIMILAR_DOCUMENTS = 10; /** * Maximum divergence between a document and similar documents. Lower values - * mean more similar documents (less divergence). Default 1.0. + * mean more similar documents (less divergence). Default 0.25. */ public static final double MAX_SIMILAR_DOCUMENTS_DIVERGENCE = 0.25; @@ -126,16 +113,26 @@ public class Constants { public static final int STATIC_ITER = 200; /** - * Minimum word frequency for words to be used for topic modeling. All words - * below this frequency in a document are filtered out before generating the - * topic model. Default 10. + * Minumum number of words per document. Default 10. + */ + public static final int DOCUMENT_MIN_LENGTH = 10; + + /** + * Minimum word frequency for a word to be accepted. Default 5. */ - public static final int DOCUMENT_MIN_WORD_FREQ = 10; + public static final int DOCUMENT_MIN_WORD_FREQ = 5; /** - * Minumum number of words per document. Default 10. + * Minimum number of dbpedia inlinks for an entity annotation to be + * accepted. Default 0. */ - public static final int DOCUMENT_MIN_LENGTH = 10; + public static final int SPOTLIGHT_SUPPORT = 0; + + /** + * Disambiguation confidence. Eliminates top n percent of inconfident + * annotations. Ranges from 0 to 1. Default 0.5. + */ + public static final double SPOTLIGHT_CONFIDENCE = 0.5; /** * The dynamic topic modeling window resolution to be used. This value is diff --git a/vipra-util/src/main/java/de/vipra/util/StringUtils.java b/vipra-util/src/main/java/de/vipra/util/StringUtils.java index 295b1dde366a183836946e8efd0cc832da96040f..c054ac0a5c5f685e690ceee925f22aaba777c7c1 100644 --- a/vipra-util/src/main/java/de/vipra/util/StringUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/StringUtils.java @@ -187,4 +187,29 @@ public class StringUtils { return sb.toString(); } + public static String pad(final String str, final int length, final String pad, final boolean left) { + if (str.length() >= length) + return str; + final StringBuilder sb = new StringBuilder(); + if (!left) + sb.append(str); + for (int i = 0; i < length - str.length(); i++) + sb.append(pad); + if (left) + sb.append(str); + return sb.toString(); + } + + public static String pad(final String str, final int length, final String pad) { + return pad(str, length, pad, false); + } + + public static String pad(final String str, final int length) { + return pad(str, length, " ", false); + } + + public static String pad(final String str, final int length, final boolean left) { + return pad(str, length, " ", left); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/URLUtils.java b/vipra-util/src/main/java/de/vipra/util/URLUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..d02544ed4a1596854ba1f31eecb1fb7774bb2d10 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/URLUtils.java @@ -0,0 +1,23 @@ +package de.vipra.util; + +import java.net.MalformedURLException; + +public class URLUtils { + + public static String concat(String url, final String path) throws MalformedURLException { + if (url.endsWith("/")) { + if (path.startsWith("/")) + url += path.substring(1); + else + url += path; + } else { + if (path.startsWith("/")) + url += path; + else + url += "/" + path; + } + + return url; + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index d66d4b8bf915af3a09f87b4d55003796853cb1d3..179af17216756cc1ee5768784a54da5669cdea4f 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -15,6 +15,7 @@ import org.mongodb.morphia.annotations.Id; import org.mongodb.morphia.annotations.Index; import org.mongodb.morphia.annotations.Indexes; import org.mongodb.morphia.annotations.PrePersist; +import org.mongodb.morphia.annotations.Reference; import org.mongodb.morphia.annotations.Transient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,9 +51,9 @@ public class ArticleFull implements Model<ObjectId>, Serializable { @ElasticIndex("date") private Date date; - @Embedded + @Reference @QueryIgnore(multi = true) - private TopicModel model; + private TopicModel topicModel; @Embedded @QueryIgnore(multi = true) @@ -150,17 +151,17 @@ public class ArticleFull implements Model<ObjectId>, Serializable { } } - public TopicModel getModel() { - return model; + public TopicModel getTopicModel() { + return topicModel; } - @ElasticIndex("model") - public String serializeModel() { - return model.getId(); + @ElasticIndex("topicmodel") + public String serializeTopicModel() { + return topicModel.getId(); } - public void setModel(final TopicModel model) { - this.model = model; + public void setTopicModel(final TopicModel topicModel) { + this.topicModel = topicModel; } public List<TopicShare> getTopics() { diff --git a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java index ee2e0c77b397d1822d25dd3cd3ffd15d54bd68aa..35752e9ea4d18601f0f7f4df092277998f8ed2a3 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Sequence.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Sequence.java @@ -6,6 +6,7 @@ import org.bson.types.ObjectId; import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Reference; import de.vipra.util.an.QueryIgnore; @@ -16,9 +17,9 @@ public class Sequence implements Model<ObjectId>, Comparable<Sequence>, Serializ @Id private ObjectId id = new ObjectId(); - @Embedded + @Reference @QueryIgnore(multi = true) - private TopicModel model; + private TopicModel topicModel; @Embedded private Window window; @@ -43,12 +44,12 @@ public class Sequence implements Model<ObjectId>, Comparable<Sequence>, Serializ this.id = id; } - public TopicModel getModel() { - return model; + public TopicModel getTopicModel() { + return topicModel; } - public void setModel(final TopicModel model) { - this.model = model; + public void setTopicModel(final TopicModel topicModel) { + this.topicModel = topicModel; } public Window getWindow() { diff --git a/vipra-util/src/main/java/de/vipra/util/model/SequenceFull.java b/vipra-util/src/main/java/de/vipra/util/model/SequenceFull.java index 91201a0f2eedf0aa9e545dd3b844e464507e5fd9..8efdbb74e5a892ad83bcac83a5a3b0be13209faa 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/SequenceFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/SequenceFull.java @@ -18,9 +18,9 @@ public class SequenceFull implements Model<ObjectId>, Comparable<SequenceFull>, @Id private ObjectId id = new ObjectId(); - @Embedded + @Reference @QueryIgnore(multi = true) - private TopicModel model; + private TopicModel topicModel; @Embedded private Window window; @@ -47,12 +47,12 @@ public class SequenceFull implements Model<ObjectId>, Comparable<SequenceFull>, this.id = id; } - public TopicModel getModel() { - return model; + public TopicModel getTopicModel() { + return topicModel; } - public void setModel(final TopicModel model) { - this.model = model; + public void setTopicModel(final TopicModel topicModel) { + this.topicModel = topicModel; } public Window getWindow() { diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java new file mode 100644 index 0000000000000000000000000000000000000000..d0bba63fee5917c2a5db59047c972a126594108d --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java @@ -0,0 +1,38 @@ +package de.vipra.util.model; + +import java.io.Serializable; + +import org.mongodb.morphia.annotations.Embedded; + +@SuppressWarnings("serial") +@Embedded +public class TextEntity implements Serializable { + + private String entity; + + private String url; + + public TextEntity() {} + + public TextEntity(final String entity, final String url) { + this.entity = entity; + this.url = url; + } + + public String getEntity() { + return entity; + } + + public void setEntity(final String entity) { + this.entity = entity; + } + + public String getUrl() { + return url; + } + + public void setUrl(final String url) { + this.url = url; + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 260b998a40e4f918e665042157893bce68748a03..f875db28a1e88ae195f5acbbdb130c97a3349eb0 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -6,13 +6,11 @@ import java.util.Date; import java.util.List; import org.bson.types.ObjectId; -import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; import org.mongodb.morphia.annotations.PrePersist; import org.mongodb.morphia.annotations.Reference; -import de.vipra.util.Constants; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.an.QueryIgnore; @@ -24,9 +22,9 @@ public class TopicFull implements Model<ObjectId>, Serializable { @Id private ObjectId id = new ObjectId(); - @Embedded + @Reference @QueryIgnore(multi = true) - private TopicModel model; + private TopicModel topicModel; private String name; @@ -67,12 +65,12 @@ public class TopicFull implements Model<ObjectId>, Serializable { this.id = MongoUtils.objectId(id); } - public TopicModel getModel() { - return model; + public TopicModel getTopicModel() { + return topicModel; } - public void setModel(final TopicModel model) { - this.model = model; + public void setTopicModel(final TopicModel topicModel) { + this.topicModel = topicModel; } public String getName() { @@ -154,10 +152,10 @@ public class TopicFull implements Model<ObjectId>, Serializable { created = modified; } - public static String getNameFromWords(final List<TopicWord> words) { + public static String getNameFromWords(final int wordsNum, final List<TopicWord> words) { String name = null; if (words != null && words.size() > 0) { - final int size = Math.min(Constants.TOPIC_AUTO_NAMING_WORDS, words.size()); + final int size = Math.min(wordsNum, words.size()); final List<String> topWords = new ArrayList<>(size); for (int i = 0; i < size; i++) { topWords.add(words.get(i).getWord()); @@ -188,7 +186,7 @@ public class TopicFull implements Model<ObjectId>, Serializable { @Override public String toString() { - return "TopicFull [id=" + id + ", model=" + model + ", name=" + name + ", sequences=" + sequences + ", avgRelevance=" + avgRelevance + return "TopicFull [id=" + id + ", model=" + topicModel + ", name=" + name + ", sequences=" + sequences + ", avgRelevance=" + avgRelevance + ", varRelevance=" + varRelevance + ", risingRelevance=" + risingRelevance + ", fallingRelevance=" + fallingRelevance + ", risingDecayRelevance=" + risingDecayRelevance + ", created=" + created + ", modified=" + modified + "]"; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModel.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModel.java index 3cb061df10c53a2a088cce367bf9ab6bbfdb3eab..e205a16b7dcf0c73d06b431284be5389f2f55274 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModel.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModel.java @@ -6,8 +6,8 @@ import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; @SuppressWarnings("serial") -@Entity(noClassnameStored = true) -public class TopicModel implements Model<String>, Serializable { +@Entity(value = "topicmodels", noClassnameStored = true) +public class TopicModel implements Model<String>, Comparable<TopicModel>, Serializable { @Id private String id; @@ -26,6 +26,11 @@ public class TopicModel implements Model<String>, Serializable { @Override public void setId(final String id) { this.id = id; + } + + @Override + public int compareTo(final TopicModel o) { + return id.compareTo(o.getId()); }; } diff --git a/vipra-util/src/main/java/de/vipra/util/ModelConfig.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java similarity index 54% rename from vipra-util/src/main/java/de/vipra/util/ModelConfig.java rename to vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java index e89dd8b5dc6a2114f98da0d2ba3b4e4f247bdf48..b916ea46d704f8606c01484076c7a8fa5ce1a152 100644 --- a/vipra-util/src/main/java/de/vipra/util/ModelConfig.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java @@ -1,11 +1,17 @@ -package de.vipra.util; +package de.vipra.util.model; import java.io.File; +import java.io.Serializable; +import org.mongodb.morphia.annotations.Embedded; + +import de.vipra.util.Constants; import de.vipra.util.Constants.ProcessorMode; import de.vipra.util.Constants.WindowResolution; -public class ModelConfig { +@SuppressWarnings("serial") +@Embedded +public class TopicModelConfig implements Serializable { private String name; private int kTopics = Constants.K_TOPICS; @@ -14,12 +20,36 @@ public class ModelConfig { private int staticIterations = Constants.STATIC_ITER; private int topicAutoNamingWords = Constants.TOPIC_AUTO_NAMING_WORDS; private int maxSimilarDocuments = Constants.MAX_SIMILAR_DOCUMENTS; + private int documentMinimumLength = Constants.DOCUMENT_MIN_LENGTH; + private int documentMinimumWordFrequency = Constants.DOCUMENT_MIN_WORD_FREQ; + private int spotlightSupport = Constants.SPOTLIGHT_SUPPORT; + private double spotlightConfidence = Constants.SPOTLIGHT_CONFIDENCE; private double minRelativeProbability = Constants.MIN_RELATIVE_PROB; private double risingDecayLambda = Constants.RISING_DECAY_LAMBDA; private double maxSimilarDocumentsDivergence = Constants.MAX_SIMILAR_DOCUMENTS_DIVERGENCE; private WindowResolution windowResolution = Constants.WINDOW_RESOLUTION; private ProcessorMode processorMode = Constants.PROCESSOR_MODE; + public TopicModelConfig() {} + + public TopicModelConfig(final TopicModelConfig topicModelConfig) { + kTopics = topicModelConfig.getkTopics(); + dynamicMinIterations = topicModelConfig.getDynamicMinIterations(); + dynamicMaxIterations = topicModelConfig.getDynamicMaxIterations(); + staticIterations = topicModelConfig.getStaticIterations(); + topicAutoNamingWords = topicModelConfig.getTopicAutoNamingWords(); + maxSimilarDocuments = topicModelConfig.getMaxSimilarDocuments(); + documentMinimumLength = topicModelConfig.getDocumentMinimumLength(); + documentMinimumWordFrequency = topicModelConfig.getDocumentMinimumWordFrequency(); + spotlightSupport = topicModelConfig.getSpotlightSupport(); + spotlightConfidence = topicModelConfig.getSpotlightConfidence(); + minRelativeProbability = topicModelConfig.getMinRelativeProbability(); + risingDecayLambda = topicModelConfig.getRisingDecayLambda(); + maxSimilarDocumentsDivergence = topicModelConfig.getMaxSimilarDocumentsDivergence(); + windowResolution = topicModelConfig.getWindowResolution(); + processorMode = topicModelConfig.getProcessorMode(); + } + public String getName() { return name; } @@ -68,6 +98,22 @@ public class ModelConfig { this.topicAutoNamingWords = topicAutoNamingWords; } + public int getDocumentMinimumLength() { + return documentMinimumLength; + } + + public void setDocumentMinimumLength(final int documentMinimumLength) { + this.documentMinimumLength = documentMinimumLength; + } + + public int getDocumentMinimumWordFrequency() { + return documentMinimumWordFrequency; + } + + public void setDocumentMinimumWordFrequency(final int documentMinimumWordFrequency) { + this.documentMinimumWordFrequency = documentMinimumWordFrequency; + } + public int getMaxSimilarDocuments() { return maxSimilarDocuments; } @@ -76,6 +122,22 @@ public class ModelConfig { this.maxSimilarDocuments = maxSimilarDocuments; } + public int getSpotlightSupport() { + return spotlightSupport; + } + + public void setSpotlightSupport(final int spotlightSupport) { + this.spotlightSupport = spotlightSupport; + } + + public double getSpotlightConfidence() { + return spotlightConfidence; + } + + public void setSpotlightConfidence(final double spotlightConfidence) { + this.spotlightConfidence = spotlightConfidence; + } + public double getMinRelativeProbability() { return minRelativeProbability; } @@ -120,8 +182,10 @@ public class ModelConfig { return new File(dataDir, name); } - public File getConfigFile(final File dataDir) { - return new File(getModelDir(dataDir), Constants.MODEL_FILE); + @Override + public String toString() { + return "[window=" + windowResolution + ", mode=" + processorMode + ", k=" + kTopics + ", iter=" + staticIterations + "/" + + dynamicMinIterations + "-" + dynamicMaxIterations + "]"; } } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java index b82838c7e199e54dcb7e5a28a5025967dfa1be5d..13a0a138faf8c6430beaa85591f5f8bfb8997b92 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java @@ -1,21 +1,37 @@ package de.vipra.util.model; +import java.io.Serializable; + +import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import de.vipra.util.an.QueryIgnore; + @SuppressWarnings("serial") -@Entity(noClassnameStored = true) -public class TopicModelFull implements Model<String> { +@Entity(value = "topicmodels", noClassnameStored = true) +public class TopicModelFull implements Model<String>, Comparable<TopicModelFull>, Serializable { @Id private String id; + private String description; + + @Embedded + @QueryIgnore(multi = true) + private TopicModelConfig modelConfig; + public TopicModelFull() {} public TopicModelFull(final String id) { this.id = id; } + public TopicModelFull(final String id, final TopicModelConfig modelConfig) { + this.id = id; + this.modelConfig = modelConfig; + } + @Override public String getId() { return id; @@ -26,4 +42,25 @@ public class TopicModelFull implements Model<String> { this.id = id; } + public String getDescription() { + return description; + } + + public void setDescription(final String description) { + this.description = description; + } + + public TopicModelConfig getModelConfig() { + return modelConfig; + } + + public void setModelConfig(final TopicModelConfig modelConfig) { + this.modelConfig = modelConfig; + } + + @Override + public int compareTo(final TopicModelFull o) { + return id.compareTo(o.getId()); + }; + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/Window.java b/vipra-util/src/main/java/de/vipra/util/model/Window.java index 3ab0ba5811484c1c69f39f008a79abf96b2a7969..da4e4f5c9ebe41165b417946bf7fcd646cb7291a 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Window.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Window.java @@ -3,9 +3,9 @@ package de.vipra.util.model; import java.io.Serializable; import java.util.Date; -import org.mongodb.morphia.annotations.Embedded; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Reference; import de.vipra.util.Constants.WindowResolution; import de.vipra.util.an.QueryIgnore; @@ -17,9 +17,9 @@ public class Window implements Model<Integer>, Serializable, Comparable<Window> @Id private Integer id; - @Embedded + @Reference @QueryIgnore(multi = true) - private TopicModel model; + private TopicModel topicModel; private Date startDate; @@ -37,12 +37,12 @@ public class Window implements Model<Integer>, Serializable, Comparable<Window> this.id = id; } - public TopicModel getModel() { - return model; + public TopicModel getTopicModel() { + return topicModel; } - public void setModel(final TopicModel model) { - this.model = model; + public void setTopicModel(final TopicModel model) { + topicModel = model; } public Date getStartDate() { diff --git a/vipra-util/src/main/java/de/vipra/util/service/MongoService.java b/vipra-util/src/main/java/de/vipra/util/service/MongoService.java index 714cbf64938dc693b5990d35b5038a011be725c5..99cafa807da5b397d6db5fb394ae752e4d5581da 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/MongoService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/MongoService.java @@ -131,6 +131,7 @@ public class MongoService<Type extends Model<IdType>, IdType> implements Service if (t == null) throw new DatabaseException(new NullPointerException("entities are null")); final List<Type> list = ListUtils.toList(t); + datastore.save(list); return list; } @@ -185,7 +186,7 @@ public class MongoService<Type extends Model<IdType>, IdType> implements Service } @Override - public void updateSingle(final Type t, final String... fields) throws DatabaseException { + public void updateSingle(final Type t, final boolean upsert, final String... fields) throws DatabaseException { if (t == null) throw new DatabaseException(new NullPointerException("entity is null")); if (t.getId() == null) @@ -215,10 +216,15 @@ public class MongoService<Type extends Model<IdType>, IdType> implements Service } } if (!noChanges) - datastore.update(query, ops); + datastore.update(query, ops, upsert); } } + @Override + public void updateSingle(final Type t, final String... fields) throws DatabaseException { + updateSingle(t, false, fields); + } + @Override public void drop() { datastore.getCollection(clazz).drop(); @@ -247,4 +253,8 @@ public class MongoService<Type extends Model<IdType>, IdType> implements Service return new MongoService<Type, IdType>(mongo, clazz); } + public static void dropDatabase(final Config config) throws ConfigException { + config.getMongo().getClient().dropDatabase(config.getDatabaseName()); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java index cc58335c5d652f0ce72f2b60c91529215c365a64..839e6989561dce0b3cf09c93b3f4e77941e8f72f 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/Service.java +++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java @@ -122,6 +122,19 @@ public interface Service<Type extends Model<IdType>, IdType, E extends Exception */ void replaceMultiple(Iterable<Type> ts) throws E; + /** + * Updates a single entity in the database + * + * @param t + * Entity to be updated + * @param upsert + * true to insert if not exists + * @param fields + * Fields to be updated + * @throws E + */ + void updateSingle(Type t, boolean upsert, String... fields) throws E; + /** * Updates a single entity in the database *