diff --git a/vipra-cmd/runcfg/CMD - Clear.launch b/vipra-cmd/runcfg/CMD - Clear.launch new file mode 100644 index 0000000000000000000000000000000000000000..c3bf3fd22b7ad771914ff3db5f914d87cdd36900 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Clear.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-cn"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Config.launch b/vipra-cmd/runcfg/CMD - Config.launch new file mode 100644 index 0000000000000000000000000000000000000000..ba73d8cf049b84061ae5e882be1d504bdd14d9af --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Config.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-o"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Help.launch b/vipra-cmd/runcfg/CMD - Help.launch new file mode 100644 index 0000000000000000000000000000000000000000..2528a35c417af8984b402da9e0de8a062449cfa1 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Help.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-h"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 1.launch b/vipra-cmd/runcfg/CMD - Import 1.launch new file mode 100644 index 0000000000000000000000000000000000000000..62653261e0ef063ca9113936fee95a92f65d28be --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Import 1.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-1.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import 10.launch b/vipra-cmd/runcfg/CMD - Import 10.launch new file mode 100644 index 0000000000000000000000000000000000000000..1d6c112bd7d743c6ccf7448fa8f42d853a5c431d --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Import 10.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/test-10.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Import All.launch b/vipra-cmd/runcfg/CMD - Import All.launch new file mode 100644 index 0000000000000000000000000000000000000000..750a14c54eeb0b90e20556e6e97e15072123b875 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Import All.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-i /home/eike/repos/master/ma-impl/vm/data/data.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Modeling.launch b/vipra-cmd/runcfg/CMD - Modeling.launch new file mode 100644 index 0000000000000000000000000000000000000000..fcb96a478a92d07d5613a2de4d94c5e26d456879 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Modeling.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-m"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/runcfg/CMD - Test.launch b/vipra-cmd/runcfg/CMD - Test.launch new file mode 100644 index 0000000000000000000000000000000000000000..2e69b94c298f5848c58e40e2bac55c7763b023f3 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Test.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-t"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 923605d7456daa44a544c0f4cf8d92e9e022d39f..a883948673f8e76a1c787140d3a2467b70ea3214 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -44,7 +44,6 @@ import de.vipra.cmd.option.TestCommand; public class Main { public static final Logger log = LogManager.getLogger(Main.class); - public static final Logger out = LogManager.getLogger("shellout"); static { // set morphia log level @@ -123,17 +122,17 @@ public class Main { try { c.run(); } catch (MongoTimeoutException e) { - out.error("timeout while trying to connect to the database"); + log.error("timeout while trying to connect to the database"); log.debug(e.getMessage(), e); } catch (NoNodeAvailableException e) { - out.error("could not connect to elasticsearch instance"); + log.error("could not connect to elasticsearch instance"); log.debug(e.getMessage(), e); } catch (Exception e) { Throwable cause = e.getCause(); if (cause != null) - out.error(cause.getMessage()); + log.error(cause.getMessage()); else - out.error(e.getMessage()); + log.error(e.getMessage()); log.debug(e.getMessage(), e); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 30607070e4f5f29d11155f407f23551880d22358..57c51a0dd21682441b35e4a6c98514b4484ee2bb 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -1,30 +1,109 @@ package de.vipra.cmd.lda; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; import java.util.List; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import de.vipra.cmd.ex.AnalyzerException; import de.vipra.util.Config; +import de.vipra.util.Constants; import de.vipra.util.ConvertStream; +import de.vipra.util.StringUtils; import de.vipra.util.WordMap; +import de.vipra.util.ex.ConfigException; import de.vipra.util.model.TopicFull; import de.vipra.util.model.TopicRef; public class DTMAnalyzer extends Analyzer { + public static final Logger log = LogManager.getLogger(DTMAnalyzer.class); + public static final String NAME = "dtm"; + + private String command; + private File modelDir; + private File outDir; + protected DTMAnalyzer() { super("Dynamic Topic Model Analyzer"); } @Override public void init(Config config, WordMap wordMap) throws AnalyzerException { - // TODO Auto-generated method stub + try { + File dataDir = config.getDataDirectory(); + this.modelDir = new File(dataDir, NAME); + this.outDir = new File(modelDir, "out"); + } catch (ConfigException e) { + throw new AnalyzerException(e); + } + + // check for binary + File dtmBinary = null; + if (config.dtmPath != null) + dtmBinary = new File(config.dtmPath); + if (dtmBinary == null || !dtmBinary.exists()) + throw new AnalyzerException( + "dtm binary not found at path: " + config.dtmPath + ", check config key 'tm.dtmpath'"); + + String corpusPrefix = this.modelDir.getAbsolutePath() + File.separator + NAME; + String outname = this.outDir.getAbsolutePath(); + String[] parameters = { + // number of topics + "--ntopics=" + Constants.K_TOPICS, + // topc modeling mode + "--mode=fit", + // random seed (0 for pseudo random) + "--rng_seed=0", + // initialize model with lda + "--initialize_lda=true", + // top chain var (default 0.005) + "--top_chain_var=0.005", + // alpha (default -10) + "--alpha=0.01", + // minimum number if iterations + "--lda_sequence_min_iter=5", + // maximum number of iterations + "--lda_sequence_max_iter=10", + // em iter (default 20) + "--lda_max_em_iter=20", + // input file prefix + "--corpus_prefix=" + corpusPrefix, + // output directory + "--outname=" + outname }; + + this.command = dtmBinary.getAbsolutePath() + " " + StringUtils.join(parameters, " "); } @Override public void analyze() throws AnalyzerException { - // TODO Auto-generated method stub + try { + Process p = Runtime.getRuntime().exec(command, null); + if (!p.isAlive()) + throw new AnalyzerException("dtm process is dead"); + + BufferedReader in = new BufferedReader(new InputStreamReader(p.getErrorStream())); + + String line; + int iteration = 0; + while ((line = in.readLine()) != null) { + if (line.contains("EM iter")) { + log.info("iteration " + iteration++); + } + } + + in.close(); + p.waitFor(); + log.info("done"); + } catch (IOException | InterruptedException e) { + throw new AnalyzerException(e); + } } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java index d1c486c15eb3dfe9d76e99c3150a1137b7a32a79..1c4843f487cadc08c4eb194fc344e55e40305598 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java @@ -25,9 +25,7 @@ import de.vipra.util.model.TopicRef; import de.vipra.util.model.TopicWord; import de.vipra.util.model.Word; import jgibblda.Estimator; -import jgibblda.Inferencer; import jgibblda.LDACmdOption; -import jgibblda.Model; public class JGibbAnalyzer extends Analyzer { @@ -69,25 +67,14 @@ public class JGibbAnalyzer extends Analyzer { this.wordMap = wordMap; } - private void estimate() { - Estimator estimator = new Estimator(); - estimator.init(options); - estimator.estimate(); - } - - @SuppressWarnings("unused") - private void inference() { - Inferencer inferencer = new Inferencer(); - inferencer.init(options); - Model newModel = inferencer.inference(); - } - @Override public void analyze() throws AnalyzerException { if (!modelFile.exists()) { throw new AnalyzerException("model file does not exist: " + modelFile.getAbsolutePath()); } - estimate(); + Estimator estimator = new Estimator(); + estimator.init(options); + estimator.estimate(); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java index 14ecd342aae942990719113ace55be533408d804..d79ddf1a1b81b82d8e3c8a3abecebe8588f4f88b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -20,7 +20,6 @@ import de.vipra.util.service.DatabaseService; public class ClearCommand implements Command { public static final Logger log = LogManager.getLogger(ClearCommand.class); - public static final Logger out = LogManager.getLogger("shellout"); private boolean defaults; private Config config; @@ -40,29 +39,29 @@ public class ClearCommand implements Command { dbWords = DatabaseService.getDatabaseService(config, Word.class); elasticClient = ESClient.getClient(config); - out.info("clearing database"); + log.info("clearing database"); dbArticles.drop(); dbTopics.drop(); dbWords.drop(); - out.info("clearing index"); + log.info("clearing index"); elasticClient.admin().indices().prepareDelete("_all").get(); try { - out.info("clearing filebase"); + log.info("clearing filebase"); File dataDir = config.getDataDirectory(); if (dataDir.exists() && dataDir.isDirectory()) { FileUtils.deleteDirectory(dataDir); } } catch (IOException e) { - out.warn("could not delete data directory: " + config.getDataDirectory().getAbsolutePath()); + log.warn("could not delete data directory: " + config.getDataDirectory().getAbsolutePath()); } } @Override public void run() throws Exception { if (!defaults) - out.info("to confirm clearing, type 'clear' and press enter"); + log.info("to confirm clearing, type 'clear' and press enter"); if (defaults || ConsoleUtils.confirm("clear")) { clear(); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java index 124827b2991c39ab3c0c7be6dfc87d8f5e845152..b5d9f4b597608966a451672439a6b0235ca6c33c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java @@ -7,7 +7,7 @@ import de.vipra.util.Config; public class ConfigCommand implements Command { - public static final Logger log = LogManager.getLogger("shellout"); + public static final Logger log = LogManager.getLogger(ConfigCommand.class); @Override public void run() throws Exception { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 88a8e4bc97eaa0f67629b8b6407f4d6305ba8521..3050019c150f7ed08015fe972cb5ea62f4e3ff8a 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -54,7 +54,6 @@ public class ImportCommand implements Command { } public static final Logger log = LogManager.getLogger(ImportCommand.class); - public static final Logger out = LogManager.getLogger("shellout"); private ArrayList<File> files = new ArrayList<>(); private JSONParser parser = new JSONParser(); @@ -114,7 +113,7 @@ public class ImportCommand implements Command { * @throws Exception */ private Article importArticle(JSONObject obj) throws Exception { - out.info("importing \"" + obj.get("title") + "\""); + log.info("importing \"" + obj.get("title") + "\""); ArticleFull article = articleFromJSON(obj); // preprocess text and generate text statistics @@ -192,8 +191,8 @@ public class ImportCommand implements Command { wordMap = new WordMap(dbWords); articleBuffer = new ArticleBuffer(dbArticles); - out.info("using data directory: " + config.getDataDirectory().getAbsolutePath()); - out.info("using preprocessor: " + preprocessor.getName()); + log.info("using data directory: " + config.getDataDirectory().getAbsolutePath()); + log.info("using preprocessor: " + preprocessor.getName()); Timer timer = new Timer(); timer.start(); @@ -201,7 +200,7 @@ public class ImportCommand implements Command { /* * import files into database and filebase */ - out.info("file import"); + log.info("file import"); List<Article> importedArticles = importFiles(files); articleBuffer.save(); timer.lap("import"); @@ -209,14 +208,14 @@ public class ImportCommand implements Command { /* * write filebase */ - out.info("writing file index"); + log.info("writing file index"); filebase.close(); timer.lap("filebase write"); /* * save words */ - out.info("saving words"); + log.info("saving words"); Set<Word> importedWords = wordMap.getNewWords(); wordMap.create(); timer.lap("saving words"); @@ -225,11 +224,11 @@ public class ImportCommand implements Command { * run information */ int newArticlesCount = importedArticles.size(); - out.info("imported " + newArticlesCount + " new " + StringUtils.quantity(newArticlesCount, "article")); + log.info("imported " + newArticlesCount + " new " + StringUtils.quantity(newArticlesCount, "article")); int newWordsCount = importedWords.size(); - out.info("imported " + newWordsCount + " new " + StringUtils.quantity(newWordsCount, "word")); - out.info(timer.toString()); - out.info("done in " + StringUtils.timeString(timer.total())); + log.info("imported " + newWordsCount + " new " + StringUtils.quantity(newWordsCount, "word")); + log.info(timer.toString()); + log.info("done in " + StringUtils.timeString(timer.total())); } } \ No newline at end of file diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index 52461fc94f81d73c5fe54c2b30c3f9ea1ca085ea..26a179e8ca45b795cd6e146c5e8c884ce650f6f3 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -36,7 +36,6 @@ import de.vipra.util.service.DatabaseService; public class ModelingCommand implements Command { public static final Logger log = LogManager.getLogger(ModelingCommand.class); - public static final Logger out = LogManager.getLogger("shellout"); private Config config; private DatabaseService<ArticleFull, ObjectId> dbArticles; @@ -60,7 +59,7 @@ public class ModelingCommand implements Command { elasticClient = ESClient.getClient(config); elasticSerializer = new ESSerializer<>(ArticleFull.class); - out.info("using analyzer: " + analyzer.getName()); + log.info("using analyzer: " + analyzer.getName()); Timer timer = new Timer(); timer.start(); @@ -68,14 +67,14 @@ public class ModelingCommand implements Command { /* * do topic modeling */ - out.info("topic modeling"); + log.info("topic modeling"); analyzer.analyze(); timer.lap("topic modeling"); /* * save topic model */ - out.info("saving topic definitions"); + log.info("saving topic definitions"); int batchSize = 100; ConvertStream<TopicFull> topicDefs = analyzer.getTopicDefinitions(); Map<String, TopicFull> topicIndexMap = new HashMap<>(); @@ -99,7 +98,7 @@ public class ModelingCommand implements Command { /* * save topic refs and index article */ - out.info("saving document topics"); + log.info("saving document topics"); ConvertStream<List<TopicRef>> topicStream = analyzer.getTopics(); FilebaseIndex index = filebase.getIndex(); Iterator<String> indexIter = index.iterator(); @@ -147,7 +146,7 @@ public class ModelingCommand implements Command { /* * save words */ - out.info("saving words"); + log.info("saving words"); Set<Word> importedWords = wordMap.getNewWords(); timer.lap("saving topic refs and indexing"); wordMap.create(); @@ -159,9 +158,9 @@ public class ModelingCommand implements Command { * run information */ int newWordsCount = importedWords.size(); - out.info("imported " + newWordsCount + " new " + StringUtils.quantity(newWordsCount, "word")); - out.info(timer.toString()); - out.info("done in " + StringUtils.timeString(timer.total())); + log.info("imported " + newWordsCount + " new " + StringUtils.quantity(newWordsCount, "word")); + log.info(timer.toString()); + log.info("done in " + StringUtils.timeString(timer.total())); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java index 79fa23e8ca2157ecff358110b129f725f42c91d6..5202375eb7d2b112a961e5931a00f9cfcb65ac1d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java @@ -13,7 +13,6 @@ import de.vipra.util.service.DatabaseService; public class StatsCommand implements Command { public static final Logger log = LogManager.getLogger(StatsCommand.class); - public static final Logger out = LogManager.getLogger("shellout"); private Config config; private DatabaseService<Article, ObjectId> dbArticles; @@ -21,9 +20,9 @@ public class StatsCommand implements Command { private DatabaseService<Word, String> dbWords; private void stats() { - out.info("# of articles: " + dbArticles.count()); - out.info("# of topics : " + dbTopics.count()); - out.info("# of words : " + dbWords.count()); + log.info("# of articles: " + dbArticles.count()); + log.info("# of topics : " + dbTopics.count()); + log.info("# of words : " + dbWords.count()); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java index 164c8a5d5d80a39360629ce79eeba16348525c84..dfc57e9a7edf30d92d3bfae6f5c7af6e0e803a5e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java @@ -13,27 +13,27 @@ import de.vipra.util.service.DatabaseService; public class TestCommand implements Command { - public static final Logger out = LogManager.getLogger("shellout"); + public static final Logger log = LogManager.getLogger(TestCommand.class); @Override public void run() throws Exception { // test if configuration readable - out.info("reading configuration..."); + log.info("reading configuration..."); Config config = Config.getConfig(); // test if database is accessible - out.info("testing mongodb connection..."); + log.info("testing mongodb connection..."); DatabaseService<Article, ObjectId> dbArticles = DatabaseService.getDatabaseService(config, Article.class); dbArticles.count(); // test if elasticsearch is accessible - out.info("testing elasticsearch connection..."); + log.info("testing elasticsearch connection..."); TransportClient esclient = ESClient.getClient(config); if (esclient.connectedNodes().isEmpty()) { throw new NoNodeAvailableException("no elasticsearch nodes available"); } - out.info("all tests passed"); + log.info("all tests passed"); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java b/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java new file mode 100644 index 0000000000000000000000000000000000000000..35ab0ec537a492b9eaf0239c867edd0da800d21d --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/plugin/ClassNameRegexFilter.java @@ -0,0 +1,77 @@ +package de.vipra.cmd.plugin; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.config.plugins.Plugin; +import org.apache.logging.log4j.core.config.plugins.PluginAttribute; +import org.apache.logging.log4j.core.config.plugins.PluginFactory; +import org.apache.logging.log4j.core.filter.AbstractFilter; + +/** + * http://rohithag.blogspot.de/2014/04/log4j2-separate-log-files-by.html + */ +@Plugin(name = "ClassNameRegexFilter", category = "Core", elementType = "filter", printObject = true) +public final class ClassNameRegexFilter extends AbstractFilter { + + private static final long serialVersionUID = -6931373371808638290L; + + private final Pattern pattern; + + private ClassNameRegexFilter(final Pattern pattern, final Result onMatch, final Result onMismatch) { + super(onMatch, onMismatch); + this.pattern = pattern; + } + + @Override + public Result filter(final LogEvent event) { + return filter(event.getLoggerName()); + } + + private Result filter(final String className) { + if (className == null) { + return onMismatch; + } + final Matcher m = pattern.matcher(className); + return m.matches() ? onMatch : onMismatch; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("pattern=").append(pattern.toString()); + return sb.toString(); + } + + /** + * Create a Filter that matches a regular expression. + * + * @param regex + * The regular expression to match. + * @param match + * The action to perform when a match occurs. + * @param mismatch + * The action to perform when a mismatch occurs. + * @return The Log4jRegexFilter. + */ + @PluginFactory + public static ClassNameRegexFilter createFilter(@PluginAttribute("regex") final String regex, + @PluginAttribute("onMatch") final String match, @PluginAttribute("onMismatch") final String mismatch) { + if (regex == null) { + LOGGER.error("A regular expression must be provided for RegexFilter"); + return null; + } + Pattern pattern; + try { + pattern = Pattern.compile(regex); + } catch (final Exception ex) { + LOGGER.error("RegexFilter caught exception compiling pattern: " + regex + " cause: " + ex.getMessage()); + return null; + } + final Result onMatch = Result.toResult(match); + final Result onMismatch = Result.toResult(mismatch); + + return new ClassNameRegexFilter(pattern, onMatch, onMismatch); + } +} \ No newline at end of file diff --git a/vipra-cmd/src/main/resources/config.properties b/vipra-cmd/src/main/resources/config.properties index f7c0fa9e161d41d8ba6a4f4b5b7bca0b9d719770..0e38e80b1c6f0884f2a8e30b5de9fd4ec95999ff 100644 --- a/vipra-cmd/src/main/resources/config.properties +++ b/vipra-cmd/src/main/resources/config.properties @@ -3,4 +3,5 @@ db.port=27017 db.name=test tm.processor=corenlp tm.analyzer=dtm -tm.saveallwords=false \ No newline at end of file +tm.saveallwords=false +tm.dtmpath=/home/eike/Downloads/dtm_release/dtm/main \ No newline at end of file diff --git a/vipra-cmd/src/main/resources/log4j2.out.xml b/vipra-cmd/src/main/resources/log4j2.out.xml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/vipra-cmd/src/main/resources/log4j2.xml b/vipra-cmd/src/main/resources/log4j2.xml index 2fc755c19c3d600e8e689f71fb93fc590e4178bb..88658479afa01dfacd915b5208a5f00d3831fe43 100644 --- a/vipra-cmd/src/main/resources/log4j2.xml +++ b/vipra-cmd/src/main/resources/log4j2.xml @@ -1,14 +1,14 @@ <?xml version="1.0" encoding="UTF-8"?> -<Configuration> +<Configuration packages="de.vipra.cmd.plugin"> <Appenders> <Console name="Console" target="SYSTEM_OUT"> <PatternLayout pattern="%highlight{%-5level - %msg%n}{FATAL=red,ERROR=red,WARN=red,INFO=normal,DEBUG=normal,TRACE=normal}" /> + <ClassNameRegexFilter regex="de.vipra.*" onMatch="ACCEPT" onMismatch="DENY"/> </Console> </Appenders> <Loggers> - <Root level="ERROR"> + <Root level="INFO"> <AppenderRef ref="Console" /> </Root> - <Logger name="shellout" level="INFO" /> </Loggers> </Configuration> \ No newline at end of file diff --git a/vipra-cmd/src/main/resources/log4j2dev.xml b/vipra-cmd/src/main/resources/log4j2dev.xml index 58ac9659b3dac15a9121ecd0e3fbf68eef61167b..8c371647c245ecb4f098a716fa981149282111b1 100644 --- a/vipra-cmd/src/main/resources/log4j2dev.xml +++ b/vipra-cmd/src/main/resources/log4j2dev.xml @@ -9,8 +9,7 @@ <Root level="ALL"> <AppenderRef ref="Console" /> </Root> - <Logger name="shellout" level="ALL"/> - <Logger name="org.mongodb" level="ERROR"/> - <Logger name="org.elasticsearch.transport.netty" level="ERROR"/> + <Logger name="org.mongodb" level="ERROR" /> + <Logger name="org.elasticsearch.transport.netty" level="ERROR" /> </Loggers> </Configuration> \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index 9dfc270ef0c9f45d6b0dda691d049b74d5eb726f..a710af84fff7934d37d9297a0c636fdd91dcf4b7 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -83,6 +83,13 @@ public class Config { @ConfigKey("tm.saveallwords") public boolean saveAllWords = Constants.SAVE_ALL_WORDS; + /** + * Path to the dtm executable. If using dtm as the anaylyzer, this path must + * be set to the dtm executable. + */ + @ConfigKey("tm.dtmpath") + public String dtmPath = ""; + /* * Configuration reader */ @@ -97,23 +104,25 @@ public class Config { for (Field field : Config.class.getDeclaredFields()) { int modifiers = field.getModifiers(); - if (!Modifier.isStatic(modifiers)) { + if (Modifier.isFinal(modifiers)) + continue; + + if (!field.isAccessible()) field.setAccessible(true); - ConfigKey ck = field.getDeclaredAnnotation(ConfigKey.class); - if (ck == null) - continue; + ConfigKey ck = field.getDeclaredAnnotation(ConfigKey.class); + if (ck == null) + continue; - String name = ck.value(); - if (name == null || name.isEmpty()) { - name = field.getName(); - } + String name = ck.value(); + if (name == null || name.isEmpty()) { + name = field.getName(); + } - if (name.length() > printMaxFieldNameLength) - printMaxFieldNameLength = name.length(); + if (name.length() > printMaxFieldNameLength) + printMaxFieldNameLength = name.length(); - foundFields.put(name, field); - } + foundFields.put(name, field); } fields = foundFields.entrySet(); @@ -163,7 +172,9 @@ public class Config { Object parsedValue = null; try { Class<?> clazz = entry.getValue().getType(); - if (clazz == Boolean.class || clazz == Boolean.TYPE) { + if (clazz == String.class) { + parsedValue = value; + } else if (clazz == Boolean.class || clazz == Boolean.TYPE) { // boolean parsedValue = Boolean.parseBoolean(value); } else if (clazz == Character.class || clazz == Character.TYPE) { diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 9b338d50a421b4578a198fda076e91cdd358a898..691f7ffce11e27aeb970854259990cea5ef3b7b3 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -269,7 +269,7 @@ public class Constants { } public static Analyzer DEFAULT() { - return JGIBB; + return DTM; } public static Analyzer fromString(String text) { diff --git a/vipra-util/src/main/java/de/vipra/util/FileUtils.java b/vipra-util/src/main/java/de/vipra/util/FileUtils.java index 10ab794c6fe67d9b457adaa3f3019cbc79d8bbc5..44aa7bce7584f406808b4ddc716a4031d3858e30 100644 --- a/vipra-util/src/main/java/de/vipra/util/FileUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/FileUtils.java @@ -8,6 +8,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Iterator; @@ -22,10 +23,62 @@ public class FileUtils extends org.apache.commons.io.FileUtils { isJAR = classResource.startsWith("jar:"); } + /** + * If this method is run from within a runnable jar, returns the runnable + * jar file, else null + * + * @return jar file or null + */ + public static File getJAR() { + if (isJAR) + try { + return new File(FileUtils.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + /** + * returns a File object, relative to this class. This is execution + * sensitive. If this method is called from within a runnable jar file, the + * file path is relative to the jar file. + * + * @param relPath + * the relative path to the file + * @return found file or null + */ + public static File getFile(String relPath) { + try { + File thisFile = new File( + FileUtils.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()); + return new File(thisFile.getParent(), relPath); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + /** + * Reads a file and returns a list of lines + * + * @param file + * the file to be read + * @return list of lines + * @throws IOException + */ public static List<String> readFile(File file) throws IOException { return Files.readAllLines(Paths.get(file.getAbsolutePath()), Constants.FILEBASE_ENCODING); } + /** + * Returns a resource file. Resource files are stored in various locations, + * depending on execution schema (direct execution, runnable jar etc.) + * + * @param name + * name of resource + * @return resource or null + */ public static InputStream getResource(String name) { while (name.startsWith("/")) name = name.substring(1); @@ -39,6 +92,14 @@ public class FileUtils extends org.apache.commons.io.FileUtils { return is; } + /** + * Counts the lines in a file + * + * @param file + * the file to be opened + * @return line count of file + * @throws IOException + */ public static int countLines(File file) throws IOException { if (!file.exists()) { return 0; @@ -63,6 +124,15 @@ public class FileUtils extends org.apache.commons.io.FileUtils { } } + /** + * Iterates the lines of a file. Closes the reader automatically when it + * hits the end of the file + * + * @param file + * the file to be read + * @return iterator over file lines + * @throws FileNotFoundException + */ public static Iterator<String> iterateFileLines(File file) throws FileNotFoundException { return (new Iterator<String>() {