diff --git a/vipra b/vipra index 3661a013e1960d9ae73c44383964b4a35e61889b..c93b564d256380af3f9c860a2801398240a9dbd3 100755 --- a/vipra +++ b/vipra @@ -8,7 +8,6 @@ if [ $? -ne 0 ]; then fi # path -SCRIPTFILE="$(basename $0)" JAR="cmd-0.0.1-SNAPSHOT.jar" DIR="./vipra-cmd/target" JARFILE="$DIR/$JAR" @@ -20,5 +19,5 @@ if [ ! -f "$JARFILE" ]; then fi # run -java -jar "$JARFILE" -x "$SCRIPTFILE" "$@" +java -jar "$JARFILE" "$@" exit $? diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java deleted file mode 100644 index 9d7209b33b58399395d0ff66f5dbda692140fb74..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java +++ /dev/null @@ -1,81 +0,0 @@ -package de.vipra.cmd; - -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; - -public class CmdOptions extends Options { - - private static final long serialVersionUID = 1L; - - public static final String OPT_HELP = "h"; - public static final String OPT_HELP_LONG = "help"; - - public static final String OPT_IMPORT = "i"; - public static final String OPT_IMPORT_LONG = "import"; - - public static final String OPT_SHELL = "x"; - public static final String OPT_SHELL_LONG = "shell"; - - public static final String OPT_CLEAR_LONG = "clear"; - - public static final String OPT_DEBUG_LONG = "debug"; - - public static final String OPT_TEST = "t"; - public static final String OPT_TEST_LONG = "test"; - - public static final String OPT_SILENT = "s"; - public static final String OPT_SILENT_LONG = "silent"; - - public static final String OPT_MODELING = "g"; - public static final String OPT_MODELING_LONG = "gen-model"; - - public static final String OPT_INDEXING = "e"; - public static final String OPT_INDEXING_LONG = "indexing"; - - public static final String OPT_REREAD = "r"; - public static final String OPT_REREAD_LONG = "reread"; - - public static final String OPT_CREATE_MODEL = "c"; - public static final String OPT_CREATE_MODEL_LONG = "create-model"; - - public static final String OPT_DELETE_MODEL = "d"; - public static final String OPT_DELETE_MODEL_LONG = "delete-model"; - - public static final String OPT_CHOOSE_MODEL = "m"; - public static final String OPT_CHOOSE_MODEL_LONG = "model"; - - public static final String OPT_LIST_MODELS = "l"; - public static final String OPT_LIST_MODELS_LONG = "list-models"; - - public static final String OPT_CONFIG_MODEL = "o"; - public static final String OPT_CONFIG_MODEL_LONG = "config-model"; - - public CmdOptions() { - addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build()); - addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name").desc("run from a shell script").build()); - addOption(Option.builder(OPT_IMPORT).longOpt(OPT_IMPORT_LONG).hasArgs().argName("files/dirs...").desc("import articles into the database") - .build()); - addOption(Option.builder().longOpt(OPT_CLEAR_LONG).desc("clear database and filebase").build()); - addOption(Option.builder().longOpt(OPT_DEBUG_LONG).desc("show debug information").build()); - addOption(Option.builder(OPT_TEST).longOpt(OPT_TEST_LONG).desc("system tests").build()); - addOption(Option.builder(OPT_SILENT).longOpt(OPT_SILENT_LONG).desc("mute all output").build()); - addOption(Option.builder(OPT_MODELING).longOpt(OPT_MODELING_LONG).desc("regenerate topic model").build()); - addOption(Option.builder(OPT_INDEXING).longOpt(OPT_INDEXING_LONG).desc("regenerate search index").build()); - addOption(Option.builder(OPT_REREAD).longOpt(OPT_REREAD_LONG).desc("reread model files").build()); - addOption(Option.builder(OPT_CREATE_MODEL).longOpt(OPT_CREATE_MODEL_LONG).hasArgs().argName("name/s...").desc("create a new topic model") - .build()); - addOption(Option.builder(OPT_DELETE_MODEL).longOpt(OPT_DELETE_MODEL_LONG).hasArgs().argName("name/s...") - .desc("delete an existing topic model").build()); - addOption(Option.builder(OPT_CHOOSE_MODEL).longOpt(OPT_CHOOSE_MODEL_LONG).hasArgs().argName("name/s...") - .desc("choose topic model(s) for further actions").build()); - addOption(Option.builder(OPT_LIST_MODELS).longOpt(OPT_LIST_MODELS_LONG).desc("list existing models").build()); - addOption(Option.builder(OPT_CONFIG_MODEL).longOpt(OPT_CONFIG_MODEL_LONG).hasArg().argName("name").desc("configure a model").build()); - } - - public void printHelp(final String cmd) { - final HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp(cmd, this, true); - } - -} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java new file mode 100644 index 0000000000000000000000000000000000000000..0e83857d6bc8a6ab82a224efefbfcb27e5140951 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CommandLineOptions.java @@ -0,0 +1,156 @@ +package de.vipra.cmd; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class CommandLineOptions { + + public static final Option CLEAR = Option.builder("c").longOpt("clear").desc("clear the database and models").build(); + public static final Option DEBUG = Option.builder("d").longOpt("debug").desc("show debug information").build(); + public static final Option EDIT = Option.builder("e").longOpt("edit").desc("edit a specific model").hasArg().argName("model").build(); + public static final Option HELP = Option.builder("h").longOpt("help").desc("show this help").build(); + public static final Option INDEX = Option.builder("i").longOpt("index").desc("create the search index").build(); + public static final Option LIST = Option.builder("l").longOpt("list").desc("list available models").build(); + public static final Option REREAD = Option.builder("r").longOpt("reread").desc("reread generated models").build(); + public static final Option SILENT = Option.builder("s").longOpt("silent").desc("suppress all output").build(); + public static final Option TEST = Option.builder("t").longOpt("test").desc("test database connections").build(); + public static final Option CREATE = Option.builder("C").longOpt("create").desc("create new models").hasArgs().argName("models...").build(); + public static final Option DELETE = Option.builder("d").longOpt("delete").desc("delete existing models").hasArgs().argName("models...").build(); + public static final Option IMPORT = Option.builder("I").longOpt("import").desc("import data from json").hasArgs().argName("models...").build(); + public static final Option MODEL = Option.builder("M").longOpt("model").desc("generate models from database").build(); + public static final Option SELECT = Option.builder("S").longOpt("select").desc("select models").hasArgs().argName("models...").build(); + + private final Options options; + private CommandLine cmd; + private final String cmdName = "vipra"; + + public CommandLineOptions() { + final Option[] optionsArray = { CLEAR, DEBUG, EDIT, HELP, INDEX, LIST, REREAD, SILENT, TEST, CREATE, DELETE, IMPORT, MODEL, SELECT }; + options = new Options(); + for (final Option option : optionsArray) + options.addOption(option); + } + + public void parse(final String[] args) throws ParseException { + cmd = new DefaultParser().parse(options, args); + checkDependencies(); + } + + public boolean hasOption(final Option opt) { + return cmd.hasOption(opt.getOpt()); + } + + public String getOptionValue(final Option opt) { + return cmd.getOptionValue(opt.getOpt()); + } + + public String[] getOptionValues(final Option opt) { + return cmd.getOptionValues(opt.getOpt()); + } + + public boolean isClear() { + return hasOption(CLEAR); + } + + public boolean isDebug() { + return hasOption(DEBUG) && !hasOption(SILENT); + } + + public boolean isEdit() { + return hasOption(EDIT); + } + + public String modelToEdit() { + return getOptionValue(EDIT); + } + + public boolean isHelp() { + return hasOption(HELP); + } + + public void printHelp() { + final HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp(cmdName, options, true); + } + + public boolean isIndex() { + return hasOption(INDEX); + } + + public String[] modelsToIndex() { + return selectedModels(); + } + + public boolean isList() { + return hasOption(LIST); + } + + public boolean isReread() { + return !hasOption(MODEL) && hasOption(REREAD); + } + + public String[] modelsToReread() { + return selectedModels(); + } + + public boolean isSilent() { + return hasOption(SILENT); + } + + public boolean isTest() { + return hasOption(TEST); + } + + public boolean isCreate() { + return hasOption(CREATE); + } + + public String[] modelsToCreate() { + return getOptionValues(CREATE); + } + + public boolean isDelete() { + return hasOption(DELETE); + } + + public String[] modelsToDelete() { + return getOptionValues(DELETE); + } + + public boolean isImport() { + return hasOption(IMPORT); + } + + public String[] filesToImport() { + return getOptionValues(IMPORT); + } + + public boolean isModel() { + return hasOption(MODEL); + } + + public String[] modelsToModel() { + return selectedModels(); + } + + public boolean isSelect() { + return hasOption(SELECT); + } + + public String[] selectedModels() { + return getOptionValues(SELECT); + } + + private void checkDependencies() throws ParseException { + if (isImport() || isModel() || isIndex() || isReread() || isDelete()) { + // these options require at least one selected model + if (!isSelect()) + throw new ParseException("select at least one model"); + } + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index d004f7620cc00373d99d78ea215433dc4588359b..f03df9e06a1864b2c6700ade50260999da580e23 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -1,46 +1,23 @@ package de.vipra.cmd; -import static de.vipra.cmd.CmdOptions.OPT_CHOOSE_MODEL; -import static de.vipra.cmd.CmdOptions.OPT_CLEAR_LONG; -import static de.vipra.cmd.CmdOptions.OPT_CONFIG_MODEL; -import static de.vipra.cmd.CmdOptions.OPT_CREATE_MODEL; -import static de.vipra.cmd.CmdOptions.OPT_DEBUG_LONG; -import static de.vipra.cmd.CmdOptions.OPT_DELETE_MODEL; -import static de.vipra.cmd.CmdOptions.OPT_HELP; -import static de.vipra.cmd.CmdOptions.OPT_IMPORT; -import static de.vipra.cmd.CmdOptions.OPT_INDEXING; -import static de.vipra.cmd.CmdOptions.OPT_LIST_MODELS; -import static de.vipra.cmd.CmdOptions.OPT_MODELING; -import static de.vipra.cmd.CmdOptions.OPT_REREAD; -import static de.vipra.cmd.CmdOptions.OPT_SHELL; -import static de.vipra.cmd.CmdOptions.OPT_SILENT; -import static de.vipra.cmd.CmdOptions.OPT_TEST; - import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.ListIterator; import java.util.Map.Entry; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.ParseException; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.core.LoggerContext; import org.apache.logging.log4j.core.config.Configuration; import org.apache.logging.log4j.core.config.LoggerConfig; -import org.elasticsearch.client.transport.NoNodeAvailableException; import org.mongodb.morphia.logging.MorphiaLoggerFactory; import org.mongodb.morphia.logging.slf4j.SLF4JLoggerImplFactory; -import com.mongodb.MongoTimeoutException; - import de.vipra.cmd.option.ClearCommand; import de.vipra.cmd.option.Command; -import de.vipra.cmd.option.ConfigModelCommand; +import de.vipra.cmd.option.EditModelCommand; import de.vipra.cmd.option.CreateModelCommand; import de.vipra.cmd.option.DeleteModelCommand; import de.vipra.cmd.option.ImportCommand; @@ -48,8 +25,6 @@ import de.vipra.cmd.option.IndexingCommand; import de.vipra.cmd.option.ListModelsCommand; import de.vipra.cmd.option.ModelingCommand; import de.vipra.cmd.option.TestCommand; -import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; import de.vipra.util.ex.ConfigException; public class Main { @@ -64,27 +39,17 @@ public class Main { } public static void main(final String[] args) throws IOException, ConfigException { - final CommandLineParser parser = new DefaultParser(); - final CmdOptions options = new CmdOptions(); - String cmd = "vipra-cmd.jar"; - CommandLine cline; + final CommandLineOptions opts = new CommandLineOptions(); try { - cline = parser.parse(options, args); - } catch (final ParseException e) { + opts.parse(args); + } catch (final Exception e) { log.error(e.getMessage()); - options.printHelp(cmd); + opts.printHelp(); return; } - if (cline.hasOption(OPT_SHELL)) { - cmd = cline.getOptionValue(OPT_SHELL); - if (cmd == null) { - cmd = "vipra.sh"; - } - } - - if (cline.hasOption(OPT_HELP)) { - options.printHelp(cmd); + if (opts.isHelp()) { + opts.printHelp(); return; } @@ -92,82 +57,44 @@ public class Main { final LoggerContext loggerContext = (LoggerContext) LogManager.getContext(false); final Configuration loggerConfigs = loggerContext.getConfiguration(); - if (cline.hasOption(OPT_DEBUG_LONG)) + if (opts.isDebug()) loggerConfigs.getLoggerConfig(LogManager.ROOT_LOGGER_NAME).setLevel(Level.DEBUG); - if (cline.hasOption(OPT_SILENT)) { + if (opts.isSilent()) { for (final Entry<String, LoggerConfig> loggerConfig : loggerConfigs.getLoggers().entrySet()) loggerConfig.getValue().setLevel(Level.OFF); } loggerContext.updateLoggers(); - // get commands final List<Command> commands = new ArrayList<>(); - if (cline.hasOption(OPT_TEST)) + if (opts.isTest()) commands.add(new TestCommand()); - if (cline.hasOption(OPT_CREATE_MODEL)) - commands.add(new CreateModelCommand(cline.getOptionValues(OPT_CREATE_MODEL))); + if (opts.isCreate()) + commands.add(new CreateModelCommand(opts.modelsToCreate())); - if (cline.hasOption(OPT_DELETE_MODEL)) - commands.add(new DeleteModelCommand(cline.getOptionValues(OPT_DELETE_MODEL))); + if (opts.isDelete()) + commands.add(new DeleteModelCommand(opts.modelsToDelete())); - if (cline.hasOption(OPT_LIST_MODELS)) + if (opts.isList()) commands.add(new ListModelsCommand()); - if (cline.hasOption(OPT_CONFIG_MODEL)) - commands.add(new ConfigModelCommand(cline.getOptionValue(OPT_CONFIG_MODEL))); - - final Config config = Config.getConfig(); - final List<ConfigDtm> configDtms = new ArrayList<>(); - if (cline.hasOption(OPT_CHOOSE_MODEL)) { - for (final String model : cline.getOptionValues(OPT_CHOOSE_MODEL)) { - if (model.toLowerCase().equals("all")) { - configDtms.clear(); - for (final Entry<String, ConfigDtm> entry : config.getDtmConfigurations().entrySet()) - configDtms.add(entry.getValue()); - break; - } else { - final ConfigDtm configDtm = config.getDtmConfigurations().get(model); - if (configDtm == null) { - log.error("unknown model: " + model); - return; - } - configDtms.add(configDtm); - } - } - } else if (config.isDefaultAllModels()) { - configDtms.clear(); - for (final Entry<String, ConfigDtm> entry : config.getDtmConfigurations().entrySet()) - configDtms.add(entry.getValue()); - } + if (opts.isEdit()) + commands.add(new EditModelCommand(opts.modelToEdit())); - if (cline.hasOption(OPT_IMPORT) || cline.hasOption(OPT_MODELING) || cline.hasOption(OPT_REREAD) || cline.hasOption(OPT_INDEXING)) { - if (!cline.hasOption(OPT_CHOOSE_MODEL) || configDtms.isEmpty()) { - log.error("no models chosen"); - return; - } - } - - if (cline.hasOption(OPT_CLEAR_LONG)) + if (opts.isClear()) commands.add(new ClearCommand()); - if (cline.hasOption(OPT_IMPORT)) { - for (final ConfigDtm configDtm : configDtms) - commands.add(new ImportCommand(configDtm, cline.getOptionValues(OPT_IMPORT))); - } + if (opts.isImport()) + commands.add(new ImportCommand(opts.selectedModels(), opts.filesToImport())); - if (cline.hasOption(OPT_MODELING) || cline.hasOption(OPT_REREAD)) { - final boolean reread = !cline.hasOption(OPT_MODELING) && cline.hasOption(OPT_REREAD); - for (final ConfigDtm configDtm : configDtms) - commands.add(new ModelingCommand(configDtm, reread)); - } + if (opts.isModel() || opts.isReread()) + commands.add(new ModelingCommand(opts.selectedModels(), opts.isReread())); - if (cline.hasOption(OPT_INDEXING)) - for (final ConfigDtm configDtm : configDtms) - commands.add(new IndexingCommand(configDtm)); + if (opts.isIndex()) + commands.add(new IndexingCommand(opts.selectedModels())); // run commands if (commands.size() > 0) { @@ -175,12 +102,6 @@ public class Main { final Command c = it.next(); try { c.run(); - } catch (final MongoTimeoutException e) { - log.error("timeout while trying to connect to the database"); - log.debug(e.getMessage(), e); - } catch (final NoNodeAvailableException e) { - log.error("could not connect to elasticsearch instance"); - log.debug(e.getMessage(), e); } catch (final Exception e) { final Throwable cause = e.getCause(); if (cause != null) @@ -191,7 +112,7 @@ public class Main { } } } else { - options.printHelp(cmd); + opts.printHelp(); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 1e8cacdcffcaf2ff7de2437f8ef602bb69ce5688..7eb8131920492241a938ca03945a0178f7767a5c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -12,8 +12,8 @@ import java.text.ParseException; import java.util.HashMap; import java.util.Map; -import de.vipra.util.ConfigDtm; import de.vipra.util.Constants; +import de.vipra.util.ModelConfig; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.ArticleFull; @@ -28,13 +28,13 @@ public class Filebase { private final FilebaseWordIndex wordIndex; private final FilebaseWindowIndex windowIndex; - public Filebase(final ConfigDtm configDtm, final File dataDir) throws ParseException, IOException { - modelDir = new File(dataDir, configDtm.getName()); + public Filebase(final ModelConfig modelConfig, final File dataDir) throws ParseException, IOException { + modelDir = new File(dataDir, modelConfig.getName()); file = new File(modelDir, FILE_NAME); newArticles = new HashMap<>(); idDateIndex = new FilebaseIDDateIndex(modelDir); wordIndex = new FilebaseWordIndex(modelDir); - windowIndex = new FilebaseWindowIndex(modelDir, configDtm.getWindowResolution()); + windowIndex = new FilebaseWindowIndex(modelDir, modelConfig.getWindowResolution()); } public void add(final ArticleFull article) throws FilebaseException { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 6f8126bef6a1e7f9da17c33f7f3e0cd9e7c6eb20..35eafe21061391bc096f815886d624e823c88375 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -23,8 +23,8 @@ import de.vipra.cmd.file.FilebaseWindowIndex; import de.vipra.cmd.file.FilebaseWordIndex; import de.vipra.util.ArrayUtils; import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; import de.vipra.util.Constants; +import de.vipra.util.ModelConfig; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; @@ -74,10 +74,10 @@ public class Analyzer { throw new AnalyzerException("dtm binary not found at path: " + config.getDtmPath() + ", check config key 'tm.dtmpath'"); } - public void analyze(final ConfigDtm configDtm, final boolean reread) + public void analyze(final ModelConfig modelConfig, final boolean reread) throws AnalyzerException, DatabaseException, ParseException, IOException, InterruptedException { - final File modelDir = new File(dataDir, configDtm.getName()); + final File modelDir = new File(dataDir, modelConfig.getName()); final File outDir = new File(modelDir, "out"); final File outDirSeq = new File(outDir, "lda-seq"); @@ -107,7 +107,7 @@ public class Analyzer { final String command = dtmBinary.getAbsolutePath() + " " + StringUtils.join(parameters, " "); - final FilebaseWindowIndex windowIndex = new FilebaseWindowIndex(modelDir, configDtm.getWindowResolution()); + final FilebaseWindowIndex windowIndex = new FilebaseWindowIndex(modelDir, modelConfig.getWindowResolution()); BufferedReader in; @@ -131,15 +131,15 @@ public class Analyzer { p.waitFor(); } - final FilebaseWordIndex wordIndex = new FilebaseWordIndex(configDtm.getModelDir(dataDir)); - final FilebaseIDDateIndex idDateIndex = new FilebaseIDDateIndex(configDtm.getModelDir(dataDir)); + final FilebaseWordIndex wordIndex = new FilebaseWordIndex(modelConfig.getModelDir(dataDir)); + final FilebaseIDDateIndex idDateIndex = new FilebaseIDDateIndex(modelConfig.getModelDir(dataDir)); - final QueryBuilder builder = QueryBuilder.builder().criteria("model.id", configDtm.getName()); + final QueryBuilder builder = QueryBuilder.builder().criteria("model.id", modelConfig.getName()); dbArticles.deleteMultiple(builder); dbTopics.deleteMultiple(builder); dbSequences.deleteMultiple(builder); dbWindows.deleteMultiple(builder); - dbTopicModels.deleteSingle(configDtm.getName()); + dbTopicModels.deleteSingle(modelConfig.getName()); final int wordCount = wordIndex.size(); final int sequencesCount = windowIndex.size(); @@ -171,7 +171,7 @@ public class Analyzer { // read topic definition files and create topics - final TopicModelFull newTopicModel = new TopicModelFull(configDtm.getName()); + final TopicModelFull newTopicModel = new TopicModelFull(modelConfig.getName()); final List<Window> newWindows = new ArrayList<>(sequencesCount); final List<SequenceFull> newSequences = new ArrayList<>(Constants.K_TOPICS * sequencesCount); final List<TopicFull> newTopics = new ArrayList<>(Constants.K_TOPICS); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigModelCommand.java deleted file mode 100644 index df0652672cd0e273ecd6e5f88853d9d71a9961eb..0000000000000000000000000000000000000000 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigModelCommand.java +++ /dev/null @@ -1,33 +0,0 @@ -package de.vipra.cmd.option; - -import java.awt.Desktop; -import java.io.File; - -import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; -import de.vipra.util.ex.ConfigException; - -public class ConfigModelCommand implements Command { - - private final File configFile; - - public ConfigModelCommand(final String model) throws ConfigException { - final Config config = Config.getConfig(); - final ConfigDtm configDtm = config.getDtmConfigurations().get(model); - configFile = configDtm.getConfigFile(config.getDataDirectory()); - } - - @Override - public void run() throws Exception { - if (!configFile.exists()) - throw new Exception("missing model configuration file: " + configFile.getAbsolutePath()); - - final String editor = System.getenv("EDITOR"); - if (editor != null && !editor.isEmpty()) { - Runtime.getRuntime().exec(editor + " " + configFile.getAbsolutePath()); - } else { - Desktop.getDesktop().edit(configFile); - } - } - -} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java index a945bf4e022a7046c3071d94d21a66dbc0b05a00..8024a5ed43af6560b5023f613333ddb794e261fb 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/CreateModelCommand.java @@ -10,9 +10,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; import de.vipra.util.Constants; import de.vipra.util.FileUtils; +import de.vipra.util.ModelConfig; public class CreateModelCommand implements Command { @@ -32,11 +32,11 @@ public class CreateModelCommand implements Command { final Config config = Config.getConfig(); final ObjectMapper mapper = new ObjectMapper(); - final String modelConfig; + final String modelConfigString; if (config.getModelConfigTemplate() == null) { - modelConfig = IOUtils.toString(FileUtils.getResource(Constants.MODEL_FILE)); + modelConfigString = IOUtils.toString(FileUtils.getResource(Constants.MODEL_FILE)); } else { - modelConfig = mapper.writeValueAsString(config.getModelConfigTemplate()); + modelConfigString = mapper.writeValueAsString(config.getModelConfigTemplate()); } mapper.enable(SerializationFeature.INDENT_OUTPUT); @@ -50,10 +50,10 @@ public class CreateModelCommand implements Command { if (!modelDir.mkdirs()) throw new Exception("could not create model directory: " + modelDir.getAbsolutePath()); final File modelConfigFile = new File(modelDir, Constants.MODEL_FILE); - final ConfigDtm configDtm = mapper.readValue(modelConfig, ConfigDtm.class); - configDtm.setName(name); - org.apache.commons.io.FileUtils.write(modelConfigFile, mapper.writeValueAsString(configDtm)); - config.getDtmConfigurations().put(name, configDtm); + final ModelConfig modelConfig = mapper.readValue(modelConfigString, ModelConfig.class); + modelConfig.setName(name); + org.apache.commons.io.FileUtils.write(modelConfigFile, mapper.writeValueAsString(modelConfig)); + config.getModelConfigs().put(name, modelConfig); log.info("model created: " + name); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java new file mode 100644 index 0000000000000000000000000000000000000000..510757b16821cca4b61758db6f0e5676eba3bf68 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java @@ -0,0 +1,28 @@ +package de.vipra.cmd.option; + +import java.awt.Desktop; +import java.io.File; + +import de.vipra.util.Config; +import de.vipra.util.ModelConfig; +import de.vipra.util.ex.ConfigException; + +public class EditModelCommand implements Command { + + private final File configFile; + + public EditModelCommand(final String model) throws ConfigException { + final Config config = Config.getConfig(); + final ModelConfig modelConfig = config.getModelConfigs().get(model); + configFile = modelConfig.getConfigFile(config.getDataDirectory()); + } + + @Override + public void run() throws Exception { + if (!configFile.exists()) + throw new Exception("missing model configuration file: " + configFile.getAbsolutePath()); + + Desktop.getDesktop().open(configFile); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index d24064a9bc5ce1611945603d52bb13edf5a2c937..94211ee14dea95cac03dfa49e7deb11effe76328 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -25,8 +25,8 @@ import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.cmd.text.ProcessorException; import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; import de.vipra.util.Constants; +import de.vipra.util.ModelConfig; import de.vipra.util.StringUtils; import de.vipra.util.Timer; import de.vipra.util.ex.ConfigException; @@ -127,7 +127,8 @@ public class ImportCommand implements Command { public static final Logger log = LogManager.getLogger(ImportCommand.class); - private final ConfigDtm configDtm; + private final String[] models; + private final int threadCount; private final List<File> files = new ArrayList<>(); private final JSONParser parser = new JSONParser(); private Config config; @@ -146,8 +147,9 @@ public class ImportCommand implements Command { * Paths to all *.json files containing articles or folders * containing *.json files. Not recursive. */ - public ImportCommand(final ConfigDtm configDtm, final String[] paths) { - this.configDtm = configDtm; + public ImportCommand(final String[] models, final String[] paths) { + this.models = models; + threadCount = Runtime.getRuntime().availableProcessors() * 10; addPaths(paths); } @@ -226,17 +228,10 @@ public class ImportCommand implements Command { return imported; } - @Override - public void run() throws IOException, ConfigException, FilebaseException, ParseException, DatabaseException, InterruptedException, - java.text.ParseException { - final int threadCount = Runtime.getRuntime().availableProcessors() * 10; - - config = Config.getConfig(); - dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); - processor = new Processor(); + private void importForModel(final ModelConfig modelConfig) + throws java.text.ParseException, IOException, ConfigException, ParseException, InterruptedException, DatabaseException { buffer = new ArticleBuffer(dbArticles); - executor = Executors.newFixedThreadPool(threadCount); - filebase = new Filebase(configDtm, config.getDataDirectory()); + filebase = new Filebase(modelConfig, config.getDataDirectory()); log.info("using data directory: " + config.getDataDirectory().getAbsolutePath()); log.info("using " + threadCount + " " + StringUtils.quantity(threadCount, "thread")); @@ -269,4 +264,15 @@ public class ImportCommand implements Command { log.info("done in " + StringUtils.timeString(timer.total())); } + @Override + public void run() throws java.text.ParseException, IOException, ParseException, InterruptedException, DatabaseException, Exception { + config = Config.getConfig(); + dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); + processor = new Processor(); + executor = Executors.newFixedThreadPool(threadCount); + for (final String model : models) { + importForModel(config.getModelConfig(model)); + } + } + } \ No newline at end of file diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java index f49c9a970c2db951bb7be863584ff2ce84bd5dd5..e72bed6539bffe99bef3ce77191a2395b5cfc542 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java @@ -1,5 +1,7 @@ package de.vipra.cmd.option; +import java.io.IOException; +import java.text.ParseException; import java.util.Map; import org.apache.logging.log4j.LogManager; @@ -10,10 +12,12 @@ import org.elasticsearch.client.Client; import de.vipra.cmd.file.FilebaseIDDateIndex; import de.vipra.cmd.file.FilebaseIDDateIndexEntry; import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; import de.vipra.util.ESClient; import de.vipra.util.ESSerializer; +import de.vipra.util.ModelConfig; import de.vipra.util.MongoUtils; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; import de.vipra.util.service.MongoService; @@ -21,22 +25,20 @@ public class IndexingCommand implements Command { public static final Logger log = LogManager.getLogger(IndexingCommand.class); - private final ConfigDtm configDtm; + private final String[] models; + private Config config; + private MongoService<ArticleFull, ObjectId> dbArticles; + private Client elasticClient; + private ESSerializer<ArticleFull> elasticSerializer; - public IndexingCommand(final ConfigDtm configDtm) { - this.configDtm = configDtm; + public IndexingCommand(final String[] models) { + this.models = models; } - @Override - public void run() throws Exception { - // TODO use configDtm - final Config config = Config.getConfig(); - final MongoService<ArticleFull, ObjectId> dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); - final FilebaseIDDateIndex index = new FilebaseIDDateIndex(configDtm.getModelDir(config.getDataDirectory())); - final Client elasticClient = ESClient.getClient(config); - final ESSerializer<ArticleFull> elasticSerializer = new ESSerializer<>(ArticleFull.class); + private void indexForModel(final ModelConfig modelConfig) throws ParseException, IOException, ConfigException, DatabaseException { + final FilebaseIDDateIndex index = new FilebaseIDDateIndex(modelConfig.getModelDir(config.getDataDirectory())); - final String indexName = configDtm.getName() + "-articles"; + final String indexName = modelConfig.getName() + "-articles"; // clear index // elasticClient.admin().indices().prepareDelete("_all").get(); @@ -58,4 +60,15 @@ public class IndexingCommand implements Command { elasticClient.close(); } + @Override + public void run() throws ParseException, IOException, DatabaseException, Exception { + config = Config.getConfig(); + dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); + elasticClient = ESClient.getClient(config); + elasticSerializer = new ESSerializer<>(ArticleFull.class); + for (final String model : models) { + indexForModel(config.getModelConfig(model)); + } + } + } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java index eeb95c6459ab7127dca33affd6105cc27d247ec0..d203417ef49cd27a044e86bbcc3fcbd349221153 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ListModelsCommand.java @@ -6,7 +6,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import de.vipra.util.Config; -import de.vipra.util.ConfigDtm; +import de.vipra.util.ModelConfig; public class ListModelsCommand implements Command { @@ -16,7 +16,7 @@ public class ListModelsCommand implements Command { public void run() throws Exception { log.info("existing models:"); final Config config = Config.getConfig(); - for (final Entry<String, ConfigDtm> entry : config.getDtmConfigurations().entrySet()) + for (final Entry<String, ModelConfig> entry : config.getModelConfigs().entrySet()) log.info(" " + entry.getValue().getName()); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index 4a0f221985378ce1847356840d7dc07e0593dea0..490ef4cb2d3cacbf1970d0fcf0a3e0b191213e7a 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -1,31 +1,35 @@ package de.vipra.cmd.option; +import java.io.IOException; +import java.text.ParseException; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import de.vipra.cmd.lda.Analyzer; -import de.vipra.util.ConfigDtm; +import de.vipra.cmd.lda.AnalyzerException; +import de.vipra.util.Config; +import de.vipra.util.ModelConfig; import de.vipra.util.StringUtils; import de.vipra.util.Timer; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.ex.DatabaseException; public class ModelingCommand implements Command { public static final Logger log = LogManager.getLogger(ModelingCommand.class); - private Analyzer analyzer; - private ConfigDtm configDtm; - private boolean reread; - - public ModelingCommand() {} + private final String[] models; + private final boolean reread; - public ModelingCommand(final ConfigDtm configDtm, final boolean reread) { - this.configDtm = configDtm; + public ModelingCommand(final String[] models, final boolean reread) { + this.models = models; this.reread = reread; } - @Override - public void run() throws Exception { - analyzer = new Analyzer(); + private void modelForModel(final ModelConfig modelConfig) + throws AnalyzerException, ConfigException, DatabaseException, ParseException, IOException, InterruptedException { + final Analyzer analyzer = new Analyzer(); final Timer timer = new Timer(); timer.restart(); @@ -34,7 +38,7 @@ public class ModelingCommand implements Command { * do topic modeling */ log.info("topic modeling"); - analyzer.analyze(configDtm, reread); + analyzer.analyze(modelConfig, reread); timer.lap("topic modeling"); /* @@ -44,4 +48,12 @@ public class ModelingCommand implements Command { log.info("done in " + StringUtils.timeString(timer.total())); } + @Override + public void run() throws Exception { + final Config config = Config.getConfig(); + for (final String model : models) { + modelForModel(config.getModelConfig(model)); + } + } + } diff --git a/vipra-cmd/src/main/resources/config.json b/vipra-cmd/src/main/resources/config.json index 83de6f0965f4377c44b8c130d9d9ece9440dc29a..137c93ce7dd59b74c3370a1a5f7643f0fca25a5f 100644 --- a/vipra-cmd/src/main/resources/config.json +++ b/vipra-cmd/src/main/resources/config.json @@ -7,7 +7,6 @@ "elasticSearchPort": 9300, "spotlightUrl": "", "dtmPath": "", - "defaultAllModels": false, "modelConfigTemplate": { "name": "", "kTopics": 20, diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index 53061973446b459984208f07ed83944a6159cb3c..e190121308f636fbc79564cd6073dad516e0be0a 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -29,13 +29,12 @@ public class Config { private String databaseName = Constants.DATABASE_NAME; private String elasticSearchHost = Constants.ES_HOST; private int elasticSearchPort = Constants.ES_PORT; - private boolean defaultAllModels = false; - private ConfigDtm modelConfigTemplate = new ConfigDtm(); + private ModelConfig modelConfigTemplate = new ModelConfig(); private String spotlightUrl; private String dtmPath; @JsonIgnore - private Map<String, ConfigDtm> dtmConfigurations; + private Map<String, ModelConfig> modelConfigs; public String getDatabaseHost() { return databaseHost; @@ -93,32 +92,31 @@ public class Config { this.dtmPath = dtmPath; } - public Map<String, ConfigDtm> getDtmConfigurations() { - return dtmConfigurations; + public Map<String, ModelConfig> getModelConfigs() { + return modelConfigs; } - public void setDtmConfigurations(final Map<String, ConfigDtm> dtmConfigurations) { - this.dtmConfigurations = dtmConfigurations; + public ModelConfig getModelConfig(final String name) throws Exception { + final ModelConfig modelConfig = modelConfigs.get(name); + if (modelConfig == null) + throw new Exception("unknown model: " + name); + return modelConfig; } - public void setDataDirectory(final String dataDirectory) { - this.dataDirectory = dataDirectory; - } - - public boolean isDefaultAllModels() { - return defaultAllModels; + public void setModelConfigs(final Map<String, ModelConfig> modelConfigs) { + this.modelConfigs = modelConfigs; } - public void setDefaultAllModels(final boolean defaultAllModels) { - this.defaultAllModels = defaultAllModels; + public void setDataDirectory(final String dataDirectory) { + this.dataDirectory = dataDirectory; } - public ConfigDtm getModelConfigTemplate() { + public ModelConfig getModelConfigTemplate() { return modelConfigTemplate; } - public void setModelConfigTemplate(final ConfigDtm configDtmTemplate) { - modelConfigTemplate = configDtmTemplate; + public void setModelConfigTemplate(final ModelConfig modelConfigTemplate) { + this.modelConfigTemplate = modelConfigTemplate; } /** @@ -232,19 +230,19 @@ public class Config { // read model configurations final File dataDir = instance.getDataDirectory(); - final Map<String, ConfigDtm> modelConfigs = new HashMap<>(); + final Map<String, ModelConfig> modelConfigs = new HashMap<>(); for (final File file : dataDir.listFiles()) { if (file.isDirectory()) { final File modelConfigFile = new File(file, Constants.MODEL_FILE); if (!modelConfigFile.exists()) throw new ConfigException("missing model configuration file: " + modelConfigFile.getAbsolutePath()); - final ConfigDtm configDtm = mapper.readValue(modelConfigFile, ConfigDtm.class); + final ModelConfig configDtm = mapper.readValue(modelConfigFile, ModelConfig.class); if (configDtm.getName() == null || configDtm.getName().isEmpty()) throw new ConfigException("models must have a name: " + modelConfigFile.getAbsolutePath()); modelConfigs.put(configDtm.getName(), configDtm); } } - instance.dtmConfigurations = modelConfigs; + instance.modelConfigs = modelConfigs; } catch (final IOException e) { throw new ConfigException(e); diff --git a/vipra-util/src/main/java/de/vipra/util/ConfigDtm.java b/vipra-util/src/main/java/de/vipra/util/ModelConfig.java similarity index 77% rename from vipra-util/src/main/java/de/vipra/util/ConfigDtm.java rename to vipra-util/src/main/java/de/vipra/util/ModelConfig.java index 19b14faf93b527a1872a67460c54221f0ca60666..c8e482f99fbf2d890f654eba0f0e541e1e635cd3 100644 --- a/vipra-util/src/main/java/de/vipra/util/ConfigDtm.java +++ b/vipra-util/src/main/java/de/vipra/util/ModelConfig.java @@ -5,7 +5,7 @@ import java.io.File; import de.vipra.util.Constants.ProcessorMode; import de.vipra.util.Constants.WindowResolution; -public class ConfigDtm { +public class ModelConfig { private String name; private final int kTopics = Constants.K_TOPICS; @@ -55,11 +55,4 @@ public class ConfigDtm { return new File(getModelDir(dataDir), Constants.MODEL_FILE); } - @Override - public String toString() { - return "ConfigDtm [name=" + name + ", kTopics=" + kTopics + ", dynamicMinIterations=" + dynamicMinIterations + ", dynamicMaxIterations=" - + dynamicMaxIterations + ", staticIterations=" + staticIterations + ", windowResolution=" + windowResolution + ", processorMode=" - + processorMode + "]"; - } - }